diff --git a/.clang-format b/.clang-format index 06c3d164ee..7ddeba8128 100644 --- a/.clang-format +++ b/.clang-format @@ -1,6 +1,6 @@ BasedOnStyle: WebKit Language: Cpp -AlignAfterOpenBracket: false +AlignAfterOpenBracket: AlwaysBreak BreakBeforeBraces: Custom BraceWrapping: AfterClass: true @@ -25,7 +25,7 @@ Standard: Cpp11 PointerAlignment: Middle MaxEmptyLinesToKeep: 2 KeepEmptyLinesAtTheStartOfBlocks: false -AllowShortFunctionsOnASingleLine: Empty +AllowShortFunctionsOnASingleLine: InlineOnly AlwaysBreakTemplateDeclarations: true IndentCaseLabels: true SpaceAfterTemplateKeyword: true diff --git a/.gitignore b/.gitignore index 8359edbabd..585a407476 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ /build /build_* +/build-* /docs/build /docs/edit /docs/tools/venv/ diff --git a/.gitmodules b/.gitmodules index 035359e759..7ea7ae94b6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -52,3 +52,6 @@ [submodule "contrib/boost"] path = contrib/boost url = https://github.com/ClickHouse-Extras/boost-extra.git +[submodule "contrib/base64"] + path = contrib/base64 + url = https://github.com/aklomp/base64.git diff --git a/.travis.yml b/.travis.yml.bak similarity index 100% rename from .travis.yml rename to .travis.yml.bak diff --git a/CHANGELOG.md b/CHANGELOG.md index 99d022ea79..4e81432edd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,1112 +1,1125 @@ -## ClickHouse release 18.14.11, 2018-10-29 - -### Bug fixes: - -* Fixed the error `Block structure mismatch in UNION stream: different number of columns` in LIMIT queries. [#2156](https://github.com/yandex/ClickHouse/issues/2156) -* Fixed errors when merging data in tables containing arrays inside Nested structures. [#3397](https://github.com/yandex/ClickHouse/pull/3397) -* Fixed incorrect query results if the `merge_tree_uniform_read_distribution` setting is disabled (it is enabled by default). [#3429](https://github.com/yandex/ClickHouse/pull/3429) -* Fixed an error on inserts to a Distributed table in Native format. [#3411](https://github.com/yandex/ClickHouse/issues/3411) - -## ClickHouse release 18.14.10, 2018-10-23 - -* The `compile_expressions` setting (JIT compilation of expressions) is disabled by default. [#3410](https://github.com/yandex/ClickHouse/pull/3410) -* The `enable_optimize_predicate_expression` setting is disabled by default. - -## ClickHouse release 18.14.9, 2018-10-16 - -### New features: - -* The `WITH CUBE` modifier for `GROUP BY` (the alternative syntax `GROUP BY CUBE(...)` is also available). [#3172](https://github.com/yandex/ClickHouse/pull/3172) -* Added the `formatDateTime` function. [Alexandr Krasheninnikov](https://github.com/yandex/ClickHouse/pull/2770) -* Added the `JDBC` table engine and `jdbc` table function (requires installing clickhouse-jdbc-bridge). [Alexandr Krasheninnikov](https://github.com/yandex/ClickHouse/pull/3210) -* Added functions for working with the ISO week number: `toISOWeek`, `toISOYear`, `toStartOfISOYear`, and `toDayOfYear`. [#3146](https://github.com/yandex/ClickHouse/pull/3146) -* Now you can use `Nullable` columns for `MySQL` and `ODBC` tables. [#3362](https://github.com/yandex/ClickHouse/pull/3362) -* Nested data structures can be read as nested objects in `JSONEachRow` format. Added the `input_format_import_nested_json` setting. [Veloman Yunkan](https://github.com/yandex/ClickHouse/pull/3144) -* Parallel processing is available for many `MATERIALIZED VIEW`s when inserting data. See the `parallel_view_processing` setting. [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3208) -* Added the `SYSTEM FLUSH LOGS` query (forced log flushes to system tables such as `query_log`) [#3321](https://github.com/yandex/ClickHouse/pull/3321) -* Now you can use pre-defined `database` and `table` macros when declaring `Replicated` tables. [#3251](https://github.com/yandex/ClickHouse/pull/3251) -* Added the ability to read `Decimal` type values in engineering notation (indicating powers of ten). [#3153](https://github.com/yandex/ClickHouse/pull/3153) - -### Experimental features: - -* Optimization of the GROUP BY clause for `LowCardinality data types.` [#3138](https://github.com/yandex/ClickHouse/pull/3138) -* Optimized calculation of expressions for `LowCardinality data types.` [#3200](https://github.com/yandex/ClickHouse/pull/3200) - -### Improvements: - -* Significantly reduced memory consumption for requests with `ORDER BY` and `LIMIT`. See the `max_bytes_before_remerge_sort` setting. [#3205](https://github.com/yandex/ClickHouse/pull/3205) -* In the absence of `JOIN` (`LEFT`, `INNER`, ...), `INNER JOIN` is assumed. [#3147](https://github.com/yandex/ClickHouse/pull/3147) -* Qualified asterisks work correctly in queries with `JOIN`. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3202) -* The `ODBC` table engine correctly chooses the method for quoting identifiers in the SQL dialect of a remote database. [Alexandr Krasheninnikov](https://github.com/yandex/ClickHouse/pull/3210) -* The `compile_expressions` setting (JIT compilation of expressions) is enabled by default. -* Fixed behavior for simultaneous DROP DATABASE/TABLE IF EXISTS and CREATE DATABASE/TABLE IF NOT EXISTS. Previously, a `CREATE DATABASE ... IF NOT EXISTS` query could return the error message "File ... already exists", and the `CREATE TABLE ... IF NOT EXISTS` and `DROP TABLE IF EXISTS` queries could return `Table ... is creating or attaching right now`. [#3101](https://github.com/yandex/ClickHouse/pull/3101) -* LIKE and IN expressions with a constant right half are passed to the remote server when querying from MySQL or ODBC tables. [#3182](https://github.com/yandex/ClickHouse/pull/3182) -* Comparisons with constant expressions in a WHERE clause are passed to the remote server when querying from MySQL and ODBC tables. Previously, only comparisons with constants were passed. [#3182](https://github.com/yandex/ClickHouse/pull/3182) -* Correct calculation of row width in the terminal for `Pretty` formats, including strings with hieroglyphs. [Amos Bird](https://github.com/yandex/ClickHouse/pull/3257). -* `ON CLUSTER` can be specified for `ALTER UPDATE` queries. -* Improved performance for reading data in `JSONEachRow` format. [#3332](https://github.com/yandex/ClickHouse/pull/3332) -* Added synonyms for the `LENGTH` and `CHARACTER_LENGTH` functions for compatibility. The `CONCAT` function is no longer case-sensitive. [#3306](https://github.com/yandex/ClickHouse/pull/3306) -* Added the `TIMESTAMP` synonym for the `DateTime` type. [#3390](https://github.com/yandex/ClickHouse/pull/3390) -* There is always space reserved for query_id in the server logs, even if the log line is not related to a query. This makes it easier to parse server text logs with third-party tools. -* Memory consumption by a query is logged when it exceeds the next level of an integer number of gigabytes. [#3205](https://github.com/yandex/ClickHouse/pull/3205) -* Added compatibility mode for the case when the client library that uses the Native protocol sends fewer columns by mistake than the server expects for the INSERT query. This scenario was possible when using the clickhouse-cpp library. Previously, this scenario caused the server to crash. [#3171](https://github.com/yandex/ClickHouse/pull/3171) -* In a user-defined WHERE expression in `clickhouse-copier`, you can now use a `partition_key` alias (for additional filtering by source table partition). This is useful if the partitioning scheme changes during copying, but only changes slightly. [#3166](https://github.com/yandex/ClickHouse/pull/3166) -* The workflow of the `Kafka` engine has been moved to a background thread pool in order to automatically reduce the speed of data reading at high loads. [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3215). -* Support for reading `Tuple` and `Nested` values of structures like `struct` in the `Cap'n'Proto format`. [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3216) -* The list of top-level domains for the `firstSignificantSubdomain` function now includes the domain `biz`. [decaseal](https://github.com/yandex/ClickHouse/pull/3219) -* In the configuration of external dictionaries, `null_value` is interpreted as the value of the default data type. [#3330](https://github.com/yandex/ClickHouse/pull/3330) -* Support for the `intDiv` and `intDivOrZero` functions for `Decimal`. [b48402e8](https://github.com/yandex/ClickHouse/commit/b48402e8712e2b9b151e0eef8193811d433a1264) -* Support for the `Date`, `DateTime`, `UUID`, and `Decimal` types as a key for the `sumMap` aggregate function. [#3281](https://github.com/yandex/ClickHouse/pull/3281) -* Support for the `Decimal` data type in external dictionaries. [#3324](https://github.com/yandex/ClickHouse/pull/3324) -* Support for the `Decimal` data type in `SummingMergeTree` tables. [#3348](https://github.com/yandex/ClickHouse/pull/3348) -* Added specializations for `UUID` in `if`. [#3366](https://github.com/yandex/ClickHouse/pull/3366) -* Reduced the number of `open` and `close` system calls when reading from a `MergeTree table`. [#3283](https://github.com/yandex/ClickHouse/pull/3283) -* A `TRUNCATE TABLE` query can be executed on any replica (the query is passed to the leader replica). [Kirill Shvakov](https://github.com/yandex/ClickHouse/pull/3375) - -### Bug fixes: - -* Fixed an issue with `Dictionary` tables for `range_hashed` dictionaries. This error occurred in version 18.12.17. [#1702](https://github.com/yandex/ClickHouse/pull/1702) -* Fixed an error when loading `range_hashed` dictionaries (the message `Unsupported type Nullable (...)`). This error occurred in version 18.12.17. [#3362](https://github.com/yandex/ClickHouse/pull/3362) -* Fixed errors in the `pointInPolygon` function due to the accumulation of inaccurate calculations for polygons with a large number of vertices located close to each other. [#3331](https://github.com/yandex/ClickHouse/pull/3331) [#3341](https://github.com/yandex/ClickHouse/pull/3341) -* If after merging data parts, the checksum for the resulting part differs from the result of the same merge in another replica, the result of the merge is deleted and the data part is downloaded from the other replica (this is the correct behavior). But after downloading the data part, it couldn't be added to the working set because of an error that the part already exists (because the data part was deleted with some delay after the merge). This led to cyclical attempts to download the same data. [#3194](https://github.com/yandex/ClickHouse/pull/3194) -* Fixed incorrect calculation of total memory consumption by queries (because of incorrect calculation, the `max_memory_usage_for_all_queries` setting worked incorrectly and the `MemoryTracking` metric had an incorrect value). This error occurred in version 18.12.13. [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3344) -* Fixed the functionality of `CREATE TABLE ... ON CLUSTER ... AS SELECT ...` This error occurred in version 18.12.13. [#3247](https://github.com/yandex/ClickHouse/pull/3247) -* Fixed unnecessary preparation of data structures for `JOIN`s on the server that initiates the request if the `JOIN` is only performed on remote servers. [#3340](https://github.com/yandex/ClickHouse/pull/3340) -* Fixed bugs in the `Kafka` engine: deadlocks after exceptions when starting to read data, and locks upon completion [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3215). -* For `Kafka` tables, the optional `schema` parameter was not passed (the schema of the `Cap'n'Proto` format). [Vojtech Splichal](https://github.com/yandex/ClickHouse/pull/3150) -* If the ensemble of ZooKeeper servers has servers that accept the connection but then immediately close it instead of responding to the handshake, ClickHouse chooses to connect another server. Previously, this produced the error `Cannot read all data. Bytes read: 0. Bytes expected: 4.` and the server couldn't start. [8218cf3a](https://github.com/yandex/ClickHouse/commit/8218cf3a5f39a43401953769d6d12a0bb8d29da9) -* If the ensemble of ZooKeeper servers contains servers for which the DNS query returns an error, these servers are ignored. [17b8e209](https://github.com/yandex/ClickHouse/commit/17b8e209221061325ad7ba0539f03c6e65f87f29) -* Fixed type conversion between `Date` and `DateTime` when inserting data in the `VALUES` format (if `input_format_values_interpret_expressions = 1`). Previously, the conversion was performed between the numerical value of the number of days in Unix Epoch time and the Unix timestamp, which led to unexpected results. [#3229](https://github.com/yandex/ClickHouse/pull/3229) -* Corrected type conversion between `Decimal` and integer numbers. [#3211](https://github.com/yandex/ClickHouse/pull/3211) -* Fixed errors in the `enable_optimize_predicate_expression` setting. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3231) -* Fixed a parsing error in CSV format with floating-point numbers if a non-default CSV separator is used, such as `;` [#3155](https://github.com/yandex/ClickHouse/pull/3155) -* Fixed the `arrayCumSumNonNegative` function (it does not accumulate negative values if the accumulator is less than zero). [Aleksey Studnev](https://github.com/yandex/ClickHouse/pull/3163) -* Fixed how `Merge` tables work on top of `Distributed` tables when using `PREWHERE`. [#3165](https://github.com/yandex/ClickHouse/pull/3165) -* Bug fixes in the `ALTER UPDATE` query. -* Fixed bugs in the `odbc` table function that appeared in version 18.12. [#3197](https://github.com/yandex/ClickHouse/pull/3197) -* Fixed the operation of aggregate functions with `StateArray` combinators. [#3188](https://github.com/yandex/ClickHouse/pull/3188) -* Fixed a crash when dividing a `Decimal` value by zero. [69dd6609](https://github.com/yandex/ClickHouse/commit/69dd6609193beb4e7acd3e6ad216eca0ccfb8179) -* Fixed output of types for operations using `Decimal` and integer arguments. [#3224](https://github.com/yandex/ClickHouse/pull/3224) -* Fixed the segfault during `GROUP BY` on `Decimal128`. [3359ba06](https://github.com/yandex/ClickHouse/commit/3359ba06c39fcd05bfdb87d6c64154819621e13a) -* The `log_query_threads` setting (logging information about each thread of query execution) now takes effect only if the `log_queries` option (logging information about queries) is set to 1. Since the `log_query_threads` option is enabled by default, information about threads was previously logged even if query logging was disabled. [#3241](https://github.com/yandex/ClickHouse/pull/3241) -* Fixed an error in the distributed operation of the quantiles aggregate function (the error message `Not found column quantile...`). [292a8855](https://github.com/yandex/ClickHouse/commit/292a885533b8e3b41ce8993867069d14cbd5a664) -* Fixed the compatibility problem when working on a cluster of version 18.12.17 servers and older servers at the same time. For distributed queries with GROUP BY keys of both fixed and non-fixed length, if there was a large amount of data to aggregate, the returned data was not always fully aggregated (two different rows contained the same aggregation keys). [#3254](https://github.com/yandex/ClickHouse/pull/3254) -* Fixed handling of substitutions in `clickhouse-performance-test`, if the query contains only part of the substitutions declared in the test. [#3263](https://github.com/yandex/ClickHouse/pull/3263) -* Fixed an error when using `FINAL` with `PREWHERE`. [#3298](https://github.com/yandex/ClickHouse/pull/3298) -* Fixed an error when using `PREWHERE` over columns that were added during `ALTER`. [#3298](https://github.com/yandex/ClickHouse/pull/3298) -* Added a check for the absence of `arrayJoin` for `DEFAULT` and `MATERIALIZED` expressions. Previously, `arrayJoin` led to an error when inserting data. [#3337](https://github.com/yandex/ClickHouse/pull/3337) -* Added a check for the absence of `arrayJoin` in a `PREWHERE` clause. Previously, this led to messages like `Size ... doesn't match` or `Unknown compression method` when executing queries. [#3357](https://github.com/yandex/ClickHouse/pull/3357) -* Fixed segfault that could occur in rare cases after optimization that replaced AND chains from equality evaluations with the corresponding IN expression. [liuyimin-bytedance](https://github.com/yandex/ClickHouse/pull/3339) -* Minor corrections to `clickhouse-benchmark`: previously, client information was not sent to the server; now the number of queries executed is calculated more accurately when shutting down and for limiting the number of iterations. [#3351](https://github.com/yandex/ClickHouse/pull/3351) [#3352](https://github.com/yandex/ClickHouse/pull/3352) - -### Backward incompatible changes: - -* Removed the `allow_experimental_decimal_type` option. The `Decimal` data type is available for default use. [#3329](https://github.com/yandex/ClickHouse/pull/3329) - -## ClickHouse release 18.12.17, 2018-09-16 - -### New features: - -* `invalidate_query` (the ability to specify a query to check whether an external dictionary needs to be updated) is implemented for the `clickhouse` source. [#3126](https://github.com/yandex/ClickHouse/pull/3126) -* Added the ability to use `UInt*`, `Int*`, and `DateTime` data types (along with the `Date` type) as a `range_hashed` external dictionary key that defines the boundaries of ranges. Now `NULL` can be used to designate an open range. [Vasily Nemkov](https://github.com/yandex/ClickHouse/pull/3123) -* The `Decimal` type now supports `var*` and `stddev*` aggregate functions. [#3129](https://github.com/yandex/ClickHouse/pull/3129) -* The `Decimal` type now supports mathematical functions (`exp`, `sin` and so on.) [#3129](https://github.com/yandex/ClickHouse/pull/3129) -* The `system.part_log` table now has the `partition_id` column. [#3089](https://github.com/yandex/ClickHouse/pull/3089) - -### Bug fixes: - -* `Merge` now works correctly on `Distributed` tables. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3159) -* Fixed incompatibility (unnecessary dependency on the `glibc` version) that made it impossible to run ClickHouse on `Ubuntu Precise` and older versions. The incompatibility arose in version 18.12.13. [#3130](https://github.com/yandex/ClickHouse/pull/3130) -* Fixed errors in the `enable_optimize_predicate_expression` setting. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3107) -* Fixed a minor issue with backwards compatibility that appeared when working with a cluster of replicas on versions earlier than 18.12.13 and simultaneously creating a new replica of a table on a server with a newer version (shown in the message `Can not clone replica, because the ... updated to new ClickHouse version`, which is logical, but shouldn't happen). [#3122](https://github.com/yandex/ClickHouse/pull/3122) - -### Backward incompatible changes: - -* The `enable_optimize_predicate_expression` option is enabled by default (which is rather optimistic). If query analysis errors occur that are related to searching for the column names, set `enable_optimize_predicate_expression` to 0. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3107) - -## ClickHouse release 18.12.14, 2018-09-13 - -### New features: - -* Added support for `ALTER UPDATE` queries. [#3035](https://github.com/yandex/ClickHouse/pull/3035) -* Added the `allow_ddl` option, which restricts the user's access to DDL queries. [#3104](https://github.com/yandex/ClickHouse/pull/3104) -* Added the `min_merge_bytes_to_use_direct_io` option for `MergeTree` engines, which allows you to set a threshold for the total size of the merge (when above the threshold, data part files will be handled using O_DIRECT). [#3117](https://github.com/yandex/ClickHouse/pull/3117) -* The `system.merges` system table now contains the `partition_id` column. [#3099](https://github.com/yandex/ClickHouse/pull/3099) - -### Improvements - -* If a data part remains unchanged during mutation, it isn't downloaded by replicas. [#3103](https://github.com/yandex/ClickHouse/pull/3103) -* Autocomplete is available for names of settings when working with `clickhouse-client`. [#3106](https://github.com/yandex/ClickHouse/pull/3106) - -### Bug fixes: - -* Added a check for the sizes of arrays that are elements of `Nested` type fields when inserting. [#3118](https://github.com/yandex/ClickHouse/pull/3118) -* Fixed an error updating external dictionaries with the `ODBC` source and `hashed` storage. This error occurred in version 18.12.13. -* Fixed a crash when creating a temporary table from a query with an `IN` condition. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3098) -* Fixed an error in aggregate functions for arrays that can have `NULL` elements. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3097) - - -## ClickHouse release 18.12.13, 2018-09-10 - -### New features: - -* Added the `DECIMAL(digits, scale)` data type (`Decimal32(scale)`, `Decimal64(scale)`, `Decimal128(scale)`). To enable it, use the setting `allow_experimental_decimal_type`. [#2846](https://github.com/yandex/ClickHouse/pull/2846) [#2970](https://github.com/yandex/ClickHouse/pull/2970) [#3008](https://github.com/yandex/ClickHouse/pull/3008) [#3047](https://github.com/yandex/ClickHouse/pull/3047) -* New `WITH ROLLUP` modifier for `GROUP BY` (alternative syntax: `GROUP BY ROLLUP(...)`). [#2948](https://github.com/yandex/ClickHouse/pull/2948) -* In requests with JOIN, the star character expands to a list of columns in all tables, in compliance with the SQL standard. You can restore the old behavior by setting `asterisk_left_columns_only` to 1 on the user configuration level. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2787) -* Added support for JOIN with table functions. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2907) -* Autocomplete by pressing Tab in clickhouse-client. [Sergey Shcherbin](https://github.com/yandex/ClickHouse/pull/2447) -* Ctrl+C in clickhouse-client clears a query that was entered. [#2877](https://github.com/yandex/ClickHouse/pull/2877) -* Added the `join_default_strictness` setting (values: `"`, `'any'`, `'all'`). This allows you to not specify `ANY` or `ALL` for `JOIN`. [#2982](https://github.com/yandex/ClickHouse/pull/2982) -* Each line of the server log related to query processing shows the query ID. [#2482](https://github.com/yandex/ClickHouse/pull/2482) -* Now you can get query execution logs in clickhouse-client (use the `send_logs_level` setting). With distributed query processing, logs are cascaded from all the servers. [#2482](https://github.com/yandex/ClickHouse/pull/2482) -* The `system.query_log` and `system.processes` (`SHOW PROCESSLIST`) tables now have information about all changed settings when you run a query (the nested structure of the `Settings` data). Added the `log_query_settings` setting. [#2482](https://github.com/yandex/ClickHouse/pull/2482) -* The `system.query_log` and `system.processes` tables now show information about the number of threads that are participating in query execution (see the `thread_numbers` column). [#2482](https://github.com/yandex/ClickHouse/pull/2482) -* Added `ProfileEvents` counters that measure the time spent on reading and writing over the network and reading and writing to disk, the number of network errors, and the time spent waiting when network bandwidth is limited. [#2482](https://github.com/yandex/ClickHouse/pull/2482) -* Added `ProfileEvents`counters that contain the system metrics from rusage (you can use them to get information about CPU usage in userspace and the kernel, page faults, and context switches), as well as taskstats metrics (use these to obtain information about I/O wait time, CPU wait time, and the amount of data read and recorded, both with and without page cache). [#2482](https://github.com/yandex/ClickHouse/pull/2482) -* The `ProfileEvents` counters are applied globally and for each query, as well as for each query execution thread, which allows you to profile resource consumption by query in detail. [#2482](https://github.com/yandex/ClickHouse/pull/2482) -* Added the `system.query_thread_log` table, which contains information about each query execution thread. Added the `log_query_threads` setting. [#2482](https://github.com/yandex/ClickHouse/pull/2482) -* The `system.metrics` and `system.events` tables now have built-in documentation. [#3016](https://github.com/yandex/ClickHouse/pull/3016) -* Added the `arrayEnumerateDense` function. [Amos Bird](https://github.com/yandex/ClickHouse/pull/2975) -* Added the `arrayCumSumNonNegative` and `arrayDifference` functions. [Aleksey Studnev](https://github.com/yandex/ClickHouse/pull/2942) -* Added the `retention` aggregate function. [Sundy Li](https://github.com/yandex/ClickHouse/pull/2887) -* Now you can add (merge) states of aggregate functions by using the plus operator, and multiply the states of aggregate functions by a nonnegative constant. [#3062](https://github.com/yandex/ClickHouse/pull/3062) [#3034](https://github.com/yandex/ClickHouse/pull/3034) -* Tables in the MergeTree family now have the virtual column `_partition_id`. [#3089](https://github.com/yandex/ClickHouse/pull/3089) - -### Experimental features: - -* Added the `LowCardinality(T)` data type. This data type automatically creates a local dictionary of values and allows data processing without unpacking the dictionary. [#2830](https://github.com/yandex/ClickHouse/pull/2830) -* Added a cache of JIT-compiled functions and a counter for the number of uses before compiling. To JIT compile expressions, enable the `compile_expressions` setting. [#2990](https://github.com/yandex/ClickHouse/pull/2990) [#3077](https://github.com/yandex/ClickHouse/pull/3077) - -### Improvements: - -* Fixed the problem with unlimited accumulation of the replication log when there are abandoned replicas. Added an effective recovery mode for replicas with a long lag. -* Improved performance of `GROUP BY` with multiple aggregation fields when one of them is string and the others are fixed length. -* Improved performance when using `PREWHERE` and with implicit transfer of expressions in `PREWHERE`. -* Improved parsing performance for text formats (`CSV`, `TSV`). [Amos Bird](https://github.com/yandex/ClickHouse/pull/2977) [#2980](https://github.com/yandex/ClickHouse/pull/2980) -* Improved performance of reading strings and arrays in binary formats. [Amos Bird](https://github.com/yandex/ClickHouse/pull/2955) -* Increased performance and reduced memory consumption for queries to `system.tables` and `system.columns` when there is a very large number of tables on a single server. [#2953](https://github.com/yandex/ClickHouse/pull/2953) -* Fixed a performance problem in the case of a large stream of queries that result in an error (the ` _dl_addr` function is visible in `perf top`, but the server isn't using much CPU). [#2938](https://github.com/yandex/ClickHouse/pull/2938) -* Conditions are cast into the View (when `enable_optimize_predicate_expression` is enabled). [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2907) -* Improvements to the functionality for the `UUID` data type. [#3074](https://github.com/yandex/ClickHouse/pull/3074) [#2985](https://github.com/yandex/ClickHouse/pull/2985) -* The `UUID` data type is supported in The-Alchemist dictionaries. [#2822](https://github.com/yandex/ClickHouse/pull/2822) -* The `visitParamExtractRaw` function works correctly with nested structures. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2974) -* When the `input_format_skip_unknown_fields` setting is enabled, object fields in `JSONEachRow` format are skipped correctly. [BlahGeek](https://github.com/yandex/ClickHouse/pull/2958) -* For a `CASE` expression with conditions, you can now omit `ELSE`, which is equivalent to `ELSE NULL`. [#2920](https://github.com/yandex/ClickHouse/pull/2920) -* The operation timeout can now be configured when working with ZooKeeper. [urykhy](https://github.com/yandex/ClickHouse/pull/2971) -* You can specify an offset for `LIMIT n, m` as `LIMIT n OFFSET m`. [#2840](https://github.com/yandex/ClickHouse/pull/2840) -* You can use the `SELECT TOP n` syntax as an alternative for `LIMIT`. [#2840](https://github.com/yandex/ClickHouse/pull/2840) -* Increased the size of the queue to write to system tables, so the `SystemLog parameter queue is full` error doesn't happen as often. -* The `windowFunnel` aggregate function now supports events that meet multiple conditions. [Amos Bird](https://github.com/yandex/ClickHouse/pull/2801) -* Duplicate columns can be used in a `USING` clause for `JOIN`. [#3006](https://github.com/yandex/ClickHouse/pull/3006) -* `Pretty` formats now have a limit on column alignment by width. Use the `output_format_pretty_max_column_pad_width` setting. If a value is wider, it will still be displayed in its entirety, but the other cells in the table will not be too wide. [#3003](https://github.com/yandex/ClickHouse/pull/3003) -* The `odbc` table function now allows you to specify the database/schema name. [Amos Bird](https://github.com/yandex/ClickHouse/pull/2885) -* Added the ability to use a username specified in the `clickhouse-client` config file. [Vladimir Kozbin](https://github.com/yandex/ClickHouse/pull/2909) -* The `ZooKeeperExceptions` counter has been split into three counters: `ZooKeeperUserExceptions`, `ZooKeeperHardwareExceptions`, and `ZooKeeperOtherExceptions`. -* `ALTER DELETE` queries work for materialized views. -* Added randomization when running the cleanup thread periodically for `ReplicatedMergeTree` tables in order to avoid periodic load spikes when there are a very large number of `ReplicatedMergeTree` tables. -* Support for `ATTACH TABLE ... ON CLUSTER` queries. [#3025](https://github.com/yandex/ClickHouse/pull/3025) - -### Bug fixes: - -* Fixed an issue with `Dictionary` tables (throws the `Size of offsets doesn't match size of column` or `Unknown compression method` exception). This bug appeared in version 18.10.3. [#2913](https://github.com/yandex/ClickHouse/issues/2913) -* Fixed a bug when merging `CollapsingMergeTree` tables if one of the data parts is empty (these parts are formed during merge or `ALTER DELETE` if all data was deleted), and the `vertical` algorithm was used for the merge. [#3049](https://github.com/yandex/ClickHouse/pull/3049) -* Fixed a race condition during `DROP` or `TRUNCATE` for `Memory` tables with a simultaneous `SELECT`, which could lead to server crashes. This bug appeared in version 1.1.54388. [#3038](https://github.com/yandex/ClickHouse/pull/3038) -* Fixed the possibility of data loss when inserting in `Replicated` tables if the `Session is expired` error is returned (data loss can be detected by the `ReplicatedDataLoss` metric). This error occurred in version 1.1.54378. [#2939](https://github.com/yandex/ClickHouse/pull/2939) [#2949](https://github.com/yandex/ClickHouse/pull/2949) [#2964](https://github.com/yandex/ClickHouse/pull/2964) -* Fixed a segfault during `JOIN ... ON`. [#3000](https://github.com/yandex/ClickHouse/pull/3000) -* Fixed the error searching column names when the `WHERE` expression consists entirely of a qualified column name, such as `WHERE table.column`. [#2994](https://github.com/yandex/ClickHouse/pull/2994) -* Fixed the "Not found column" error that occurred when executing distributed queries if a single column consisting of an IN expression with a subquery is requested from a remote server. [#3087](https://github.com/yandex/ClickHouse/pull/3087) -* Fixed the `Block structure mismatch in UNION stream: different number of columns` error that occurred for distributed queries if one of the shards is local and the other is not, and optimization of the move to `PREWHERE` is triggered. [#2226](https://github.com/yandex/ClickHouse/pull/2226) [#3037](https://github.com/yandex/ClickHouse/pull/3037) [#3055](https://github.com/yandex/ClickHouse/pull/3055) [#3065](https://github.com/yandex/ClickHouse/pull/3065) [#3073](https://github.com/yandex/ClickHouse/pull/3073) [#3090](https://github.com/yandex/ClickHouse/pull/3090) [#3093](https://github.com/yandex/ClickHouse/pull/3093) -* Fixed the `pointInPolygon` function for certain cases of non-convex polygons. [#2910](https://github.com/yandex/ClickHouse/pull/2910) -* Fixed the incorrect result when comparing `nan` with integers. [#3024](https://github.com/yandex/ClickHouse/pull/3024) -* Fixed an error in the `zlib-ng` library that could lead to segfault in rare cases. [#2854](https://github.com/yandex/ClickHouse/pull/2854) -* Fixed a memory leak when inserting into a table with `AggregateFunction` columns, if the state of the aggregate function is not simple (allocates memory separately), and if a single insertion request results in multiple small blocks. [#3084](https://github.com/yandex/ClickHouse/pull/3084) -* Fixed a race condition when creating and deleting the same `Buffer` or `MergeTree` table simultaneously. -* Fixed the possibility of a segfault when comparing tuples made up of certain non-trivial types, such as tuples. [#2989](https://github.com/yandex/ClickHouse/pull/2989) -* Fixed the possibility of a segfault when running certain `ON CLUSTER` queries. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2960) -* Fixed an error in the `arrayDistinct` function for `Nullable` array elements. [#2845](https://github.com/yandex/ClickHouse/pull/2845) [#2937](https://github.com/yandex/ClickHouse/pull/2937) -* The `enable_optimize_predicate_expression` option now correctly supports cases with `SELECT *`. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2929) -* Fixed the segfault when re-initializing the ZooKeeper session. [#2917](https://github.com/yandex/ClickHouse/pull/2917) -* Fixed potential blocking when working with ZooKeeper. -* Fixed incorrect code for adding nested data structures in a `SummingMergeTree`. -* When allocating memory for states of aggregate functions, alignment is correctly taken into account, which makes it possible to use operations that require alignment when implementing states of aggregate functions. [chenxing-xc](https://github.com/yandex/ClickHouse/pull/2808) - -### Security fix: - -* Safe use of ODBC data sources. Interaction with ODBC drivers uses a separate `clickhouse-odbc-bridge` process. Errors in third-party ODBC drivers no longer cause problems with server stability or vulnerabilities. [#2828](https://github.com/yandex/ClickHouse/pull/2828) [#2879](https://github.com/yandex/ClickHouse/pull/2879) [#2886](https://github.com/yandex/ClickHouse/pull/2886) [#2893](https://github.com/yandex/ClickHouse/pull/2893) [#2921](https://github.com/yandex/ClickHouse/pull/2921) -* Fixed incorrect validation of the file path in the `catBoostPool` table function. [#2894](https://github.com/yandex/ClickHouse/pull/2894) -* The contents of system tables (`tables`, `databases`, `parts`, `columns`, `parts_columns`, `merges`, `mutations`, `replicas`, and `replication_queue`) are filtered according to the user's configured access to databases (`allow_databases`). [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2856) - -### Backward incompatible changes: - -* In requests with JOIN, the star character expands to a list of columns in all tables, in compliance with the SQL standard. You can restore the old behavior by setting `asterisk_left_columns_only` to 1 on the user configuration level. - -### Build changes: - -* Most integration tests can now be run by commit. -* Code style checks can also be run by commit. -* The `memcpy` implementation is chosen correctly when building on CentOS7/Fedora. [Etienne Champetier](https://github.com/yandex/ClickHouse/pull/2912) -* When using clang to build, some warnings from `-Weverything` have been added, in addition to the regular `-Wall-Wextra -Werror`. [#2957](https://github.com/yandex/ClickHouse/pull/2957) -* Debugging the build uses the `jemalloc` debug option. -* The interface of the library for interacting with ZooKeeper is declared abstract. [#2950](https://github.com/yandex/ClickHouse/pull/2950) - -## ClickHouse release 18.10.3, 2018-08-13 - -### New features: - -* HTTPS can be used for replication. [#2760](https://github.com/yandex/ClickHouse/pull/2760) -* Added the functions `murmurHash2_64`, `murmurHash3_32`, `murmurHash3_64`, and `murmurHash3_128` in addition to the existing `murmurHash2_32`. [#2791](https://github.com/yandex/ClickHouse/pull/2791) -* Support for Nullable types in the ClickHouse ODBC driver (`ODBCDriver2` output format). [#2834](https://github.com/yandex/ClickHouse/pull/2834) -* Support for `UUID` in the key columns. - -### Improvements: - -* Clusters can be removed without restarting the server when they are deleted from the config files. [#2777](https://github.com/yandex/ClickHouse/pull/2777) -* External dictionaries can be removed without restarting the server when they are removed from config files. [#2779](https://github.com/yandex/ClickHouse/pull/2779) -* Added `SETTINGS` support for the `Kafka` table engine. [Alexander Marshalov](https://github.com/yandex/ClickHouse/pull/2781) -* Improvements for the `UUID` data type (not yet complete). [#2618](https://github.com/yandex/ClickHouse/pull/2618) -* Support for empty parts after merges in the `SummingMergeTree`, `CollapsingMergeTree` and `VersionedCollapsingMergeTree` engines. [#2815](https://github.com/yandex/ClickHouse/pull/2815) -* Old records of completed mutations are deleted (`ALTER DELETE`). [#2784](https://github.com/yandex/ClickHouse/pull/2784) -* Added the `system.merge_tree_settings` table. [Kirill Shvakov](https://github.com/yandex/ClickHouse/pull/2841) -* The `system.tables` table now has dependency columns: `dependencies_database` and `dependencies_table`. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2851) -* Added the `max_partition_size_to_drop` config option. [#2782](https://github.com/yandex/ClickHouse/pull/2782) -* Added the `output_format_json_escape_forward_slashes` option. [Alexander Bocharov](https://github.com/yandex/ClickHouse/pull/2812) -* Added the `max_fetch_partition_retries_count` setting. [#2831](https://github.com/yandex/ClickHouse/pull/2831) -* Added the `prefer_localhost_replica` setting for disabling the preference for a local replica and going to a local replica without inter-process interaction. [#2832](https://github.com/yandex/ClickHouse/pull/2832) -* The `quantileExact` aggregate function returns `nan` in the case of aggregation on an empty `Float32` or `Float64` set. [Sundy Li](https://github.com/yandex/ClickHouse/pull/2855) - -### Bug fixes: - -* Removed unnecessary escaping of the connection string parameters for ODBC, which made it impossible to establish a connection. This error occurred in version 18.6.0. -* Fixed the logic for processing `REPLACE PARTITION` commands in the replication queue. If there are two `REPLACE` commands for the same partition, the incorrect logic could cause one of them to remain in the replication queue and not be executed. [#2814](https://github.com/yandex/ClickHouse/pull/2814) -* Fixed a merge bug when all data parts were empty (parts that were formed from a merge or from `ALTER DELETE` if all data was deleted). This bug appeared in version 18.1.0. [#2930](https://github.com/yandex/ClickHouse/pull/2930) -* Fixed an error for concurrent `Set` or `Join`. [Amos Bird](https://github.com/yandex/ClickHouse/pull/2823) -* Fixed the `Block structure mismatch in UNION stream: different number of columns` error that occurred for `UNION ALL` queries inside a sub-query if one of the `SELECT` queries contains duplicate column names. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2094) -* Fixed a memory leak if an exception occurred when connecting to a MySQL server. -* Fixed incorrect clickhouse-client response code in case of a request error. -* Fixed incorrect behavior of materialized views containing DISTINCT. [#2795](https://github.com/yandex/ClickHouse/issues/2795) - -### Backward incompatible changes - -* Removed support for CHECK TABLE queries for Distributed tables. - -### Build changes: - -* The allocator has been replaced: `jemalloc` is now used instead of `tcmalloc`. In some scenarios, this increases speed up to 20%. However, there are queries that have slowed by up to 20%. Memory consumption has been reduced by approximately 10% in some scenarios, with improved stability. With highly competitive loads, CPU usage in userspace and in system shows just a slight increase. [#2773](https://github.com/yandex/ClickHouse/pull/2773) -* Use of libressl from a submodule. [#1983](https://github.com/yandex/ClickHouse/pull/1983) [#2807](https://github.com/yandex/ClickHouse/pull/2807) -* Use of unixodbc from a submodule. [#2789](https://github.com/yandex/ClickHouse/pull/2789) -* Use of mariadb-connector-c from a submodule. [#2785](https://github.com/yandex/ClickHouse/pull/2785) -* Added functional test files to the repository that depend on the availability of test data (for the time being, without the test data itself). - -## ClickHouse release 18.6.0, 2018-08-02 - -### New features: - -* Added support for ON expressions for the JOIN ON syntax: -`JOIN ON Expr([table.]column ...) = Expr([table.]column, ...) [AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]` -The expression must be a chain of equalities joined by the AND operator. Each side of the equality can be an arbitrary expression over the columns of one of the tables. The use of fully qualified column names is supported (`table.name`, `database.table.name`, `table_alias.name`, `subquery_alias.name`) for the right table. [#2742](https://github.com/yandex/ClickHouse/pull/2742) -* HTTPS can be enabled for replication. [#2760](https://github.com/yandex/ClickHouse/pull/2760) - -### Improvements: - -* The server passes the patch component of its version to the client. Data about the patch version component is in `system.processes` and `query_log`. [#2646](https://github.com/yandex/ClickHouse/pull/2646) - -## ClickHouse release 18.5.1, 2018-07-31 - -### New features: - -* Added the hash function `murmurHash2_32` [#2756](https://github.com/yandex/ClickHouse/pull/2756). - -### Improvements: - -* Now you can use the `from_env` [#2741](https://github.com/yandex/ClickHouse/pull/2741) attribute to set values in config files from environment variables. -* Added case-insensitive versions of the `coalesce`, `ifNull`, and `nullIf functions` [#2752](https://github.com/yandex/ClickHouse/pull/2752). - -### Bug fixes: - -* Fixed a possible bug when starting a replica [#2759](https://github.com/yandex/ClickHouse/pull/2759). - -## ClickHouse release 18.4.0, 2018-07-28 - -### New features: - -* Added system tables: `formats`, `data_type_families`, `aggregate_function_combinators`, `table_functions`, `table_engines`, `collations` [#2721](https://github.com/yandex/ClickHouse/pull/2721). -* Added the ability to use a table function instead of a table as an argument of a `remote` or `cluster table function` [#2708](https://github.com/yandex/ClickHouse/pull/2708). -* Support for `HTTP Basic` authentication in the replication protocol [#2727](https://github.com/yandex/ClickHouse/pull/2727). -* The `has` function now allows searching for a numeric value in an array of `Enum` values [Maxim Khrisanfov](https://github.com/yandex/ClickHouse/pull/2699). -* Support for adding arbitrary message separators when reading from `Kafka` [Amos Bird](https://github.com/yandex/ClickHouse/pull/2701). - -### Improvements: - -* The `ALTER TABLE t DELETE WHERE` query does not rewrite data parts that were not affected by the WHERE condition [#2694](https://github.com/yandex/ClickHouse/pull/2694). -* The `use_minimalistic_checksums_in_zookeeper` option for `ReplicatedMergeTree` tables is enabled by default. This setting was added in version 1.1.54378, 2018-04-16. Versions that are older than 1.1.54378 can no longer be installed. -* Support for running `KILL` and `OPTIMIZE` queries that specify `ON CLUSTER` [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2689). - -### Bug fixes: - -* Fixed the error `Column ... is not under an aggregate function and not in GROUP BY` for aggregation with an IN expression. This bug appeared in version 18.1.0. ([bbdd780b](https://github.com/yandex/ClickHouse/commit/bbdd780be0be06a0f336775941cdd536878dd2c2)) -* Fixed a bug in the `windowFunnel aggregate function` [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2735). -* Fixed a bug in the `anyHeavy` aggregate function ([a2101df2](https://github.com/yandex/ClickHouse/commit/a2101df25a6a0fba99aa71f8793d762af2b801ee)) -* Fixed server crash when using the `countArray()` aggregate function. - -### Backward incompatible changes: - -* Parameters for `Kafka` engine was changed from `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_schema, kafka_num_consumers])` to `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_row_delimiter, kafka_schema, kafka_num_consumers])`. If your tables use `kafka_schema` or `kafka_num_consumers` parameters, you have to manually edit the metadata files `path/metadata/database/table.sql` and add `kafka_row_delimiter` parameter with `''` value. - -## ClickHouse release 18.1.0, 2018-07-23 - -### New features: - -* Support for the `ALTER TABLE t DELETE WHERE` query for non-replicated MergeTree tables ([#2634](https://github.com/yandex/ClickHouse/pull/2634)). -* Support for arbitrary types for the `uniq*` family of aggregate functions ([#2010](https://github.com/yandex/ClickHouse/issues/2010)). -* Support for arbitrary types in comparison operators ([#2026](https://github.com/yandex/ClickHouse/issues/2026)). -* The `users.xml` file allows setting a subnet mask in the format `10.0.0.1/255.255.255.0`. This is necessary for using masks for IPv6 networks with zeros in the middle ([#2637](https://github.com/yandex/ClickHouse/pull/2637)). -* Added the `arrayDistinct` function ([#2670](https://github.com/yandex/ClickHouse/pull/2670)). -* The SummingMergeTree engine can now work with AggregateFunction type columns ([Constantin S. Pan](https://github.com/yandex/ClickHouse/pull/2566)). - -### Improvements: - -* Changed the numbering scheme for release versions. Now the first part contains the year of release (A.D., Moscow timezone, minus 2000), the second part contains the number for major changes (increases for most releases), and the third part is the patch version. Releases are still backwards compatible, unless otherwise stated in the changelog. -* Faster conversions of floating-point numbers to a string ([Amos Bird](https://github.com/yandex/ClickHouse/pull/2664)). -* If some rows were skipped during an insert due to parsing errors (this is possible with the `input_allow_errors_num` and `input_allow_errors_ratio` settings enabled), the number of skipped rows is now written to the server log ([Leonardo Cecchi](https://github.com/yandex/ClickHouse/pull/2669)). - -### Bug fixes: - -* Fixed the TRUNCATE command for temporary tables ([Amos Bird](https://github.com/yandex/ClickHouse/pull/2624)). -* Fixed a rare deadlock in the ZooKeeper client library that occurred when there was a network error while reading the response ([c315200](https://github.com/yandex/ClickHouse/commit/c315200e64b87e44bdf740707fc857d1fdf7e947)). -* Fixed an error during a CAST to Nullable types ([#1322](https://github.com/yandex/ClickHouse/issues/1322)). -* Fixed the incorrect result of the `maxIntersection()` function when the boundaries of intervals coincided ([Michael Furmur](https://github.com/yandex/ClickHouse/pull/2657)). -* Fixed incorrect transformation of the OR expression chain in a function argument ([chenxing-xc](https://github.com/yandex/ClickHouse/pull/2663)). -* Fixed performance degradation for queries containing `IN (subquery)` expressions inside another subquery ([#2571](https://github.com/yandex/ClickHouse/issues/2571)). -* Fixed incompatibility between servers with different versions in distributed queries that use a `CAST` function that isn't in uppercase letters ([fe8c4d6](https://github.com/yandex/ClickHouse/commit/fe8c4d64e434cacd4ceef34faa9005129f2190a5)). -* Added missing quoting of identifiers for queries to an external DBMS ([#2635](https://github.com/yandex/ClickHouse/issues/2635)). - -### Backward incompatible changes: - -* Converting a string containing the number zero to DateTime does not work. Example: `SELECT toDateTime('0')`. This is also the reason that `DateTime DEFAULT '0'` does not work in tables, as well as `0` in dictionaries. Solution: replace `0` with `0000-00-00 00:00:00`. - -## ClickHouse release 1.1.54394, 2018-07-12 - -### New features: - -* Added the `histogram` aggregate function ([Mikhail Surin](https://github.com/yandex/ClickHouse/pull/2521)). -* Now `OPTIMIZE TABLE ... FINAL` can be used without specifying partitions for `ReplicatedMergeTree` ([Amos Bird](https://github.com/yandex/ClickHouse/pull/2600)). - -### Bug fixes: - -* Fixed a problem with a very small timeout for sockets (one second) for reading and writing when sending and downloading replicated data, which made it impossible to download larger parts if there is a load on the network or disk (it resulted in cyclical attempts to download parts). This error occurred in version 1.1.54388. -* Fixed issues when using chroot in ZooKeeper if you inserted duplicate data blocks in the table. -* The `has` function now works correctly for an array with Nullable elements ([#2115](https://github.com/yandex/ClickHouse/issues/2115)). -* The `system.tables` table now works correctly when used in distributed queries. The `metadata_modification_time` and `engine_full` columns are now non-virtual. Fixed an error that occurred if only these columns were requested from the table. -* Fixed how an empty `TinyLog` table works after inserting an empty data block ([#2563](https://github.com/yandex/ClickHouse/issues/2563)). -* The `system.zookeeper` table works if the value of the node in ZooKeeper is NULL. - -## ClickHouse release 1.1.54390, 2018-07-06 - -### New features: - -* Queries can be sent in `multipart/form-data` format (in the `query` field), which is useful if external data is also sent for query processing ([Olga Hvostikova](https://github.com/yandex/ClickHouse/pull/2490)). -* Added the ability to enable or disable processing single or double quotes when reading data in CSV format. You can configure this in the `format_csv_allow_single_quotes` and `format_csv_allow_double_quotes` settings ([Amos Bird](https://github.com/yandex/ClickHouse/pull/2574)). -* Now `OPTIMIZE TABLE ... FINAL` can be used without specifying the partition for non-replicated variants of `MergeTree` ([Amos Bird](https://github.com/yandex/ClickHouse/pull/2599)). - -### Improvements: - -* Improved performance, reduced memory consumption, and correct memory consumption tracking with use of the IN operator when a table index could be used ([#2584](https://github.com/yandex/ClickHouse/pull/2584)). -* Removed redundant checking of checksums when adding a data part. This is important when there are a large number of replicas, because in these cases the total number of checks was equal to N^2. -* Added support for `Array(Tuple(...))` arguments for the `arrayEnumerateUniq` function ([#2573](https://github.com/yandex/ClickHouse/pull/2573)). -* Added `Nullable` support for the `runningDifference` function ([#2594](https://github.com/yandex/ClickHouse/pull/2594)). -* Improved query analysis performance when there is a very large number of expressions ([#2572](https://github.com/yandex/ClickHouse/pull/2572)). -* Faster selection of data parts for merging in `ReplicatedMergeTree` tables. Faster recovery of the ZooKeeper session ([#2597](https://github.com/yandex/ClickHouse/pull/2597)). -* The `format_version.txt` file for `MergeTree` tables is re-created if it is missing, which makes sense if ClickHouse is launched after copying the directory structure without files ([Ciprian Hacman](https://github.com/yandex/ClickHouse/pull/2593)). - -### Bug fixes: - -* Fixed a bug when working with ZooKeeper that could make it impossible to recover the session and readonly states of tables before restarting the server. -* Fixed a bug when working with ZooKeeper that could result in old nodes not being deleted if the session is interrupted. -* Fixed an error in the `quantileTDigest` function for Float arguments (this bug was introduced in version 1.1.54388) ([Mikhail Surin](https://github.com/yandex/ClickHouse/pull/2553)). -* Fixed a bug in the index for MergeTree tables if the primary key column is located inside the function for converting types between signed and unsigned integers of the same size ([#2603](https://github.com/yandex/ClickHouse/pull/2603)). -* Fixed segfault if `macros` are used but they aren't in the config file ([#2570](https://github.com/yandex/ClickHouse/pull/2570)). -* Fixed switching to the default database when reconnecting the client ([#2583](https://github.com/yandex/ClickHouse/pull/2583)). -* Fixed a bug that occurred when the `use_index_for_in_with_subqueries` setting was disabled. - -### Security fix: - -* Sending files is no longer possible when connected to MySQL (`LOAD DATA LOCAL INFILE`). - -## ClickHouse release 1.1.54388, 2018-06-28 - -### New features: - -* Support for the `ALTER TABLE t DELETE WHERE` query for replicated tables. Added the `system.mutations` table to track progress of this type of queries. -* Support for the `ALTER TABLE t [REPLACE|ATTACH] PARTITION` query for \*MergeTree tables. -* Support for the `TRUNCATE TABLE` query ([Winter Zhang](https://github.com/yandex/ClickHouse/pull/2260)) -* Several new `SYSTEM` queries for replicated tables (`RESTART REPLICAS`, `SYNC REPLICA`, `[STOP|START] [MERGES|FETCHES|SENDS REPLICATED|REPLICATION QUEUES]`). -* Added the ability to write to a table with the MySQL engine and the corresponding table function ([sundy-li](https://github.com/yandex/ClickHouse/pull/2294)). -* Added the `url()` table function and the `URL` table engine ([Alexander Sapin](https://github.com/yandex/ClickHouse/pull/2501)). -* Added the `windowFunnel` aggregate function ([sundy-li](https://github.com/yandex/ClickHouse/pull/2352)). -* New `startsWith` and `endsWith` functions for strings ([Vadim Plakhtinsky](https://github.com/yandex/ClickHouse/pull/2429)). -* The `numbers()` table function now allows you to specify the offset ([Winter Zhang](https://github.com/yandex/ClickHouse/pull/2535)). -* The password to `clickhouse-client` can be entered interactively. -* Server logs can now be sent to syslog ([Alexander Krasheninnikov](https://github.com/yandex/ClickHouse/pull/2459)). -* Support for logging in dictionaries with a shared library source ([Alexander Sapin](https://github.com/yandex/ClickHouse/pull/2472)). -* Support for custom CSV delimiters ([Ivan Zhukov](https://github.com/yandex/ClickHouse/pull/2263)) -* Added the `date_time_input_format` setting. If you switch this setting to `'best_effort'`, DateTime values will be read in a wide range of formats. -* Added the `clickhouse-obfuscator` utility for data obfuscation. Usage example: publishing data used in performance tests. - -### Experimental features: - -* Added the ability to calculate `and` arguments only where they are needed ([Anastasia Tsarkova](https://github.com/yandex/ClickHouse/pull/2272)) -* JIT compilation to native code is now available for some expressions ([pyos](https://github.com/yandex/ClickHouse/pull/2277)). - -### Bug fixes: - -* Duplicates no longer appear for a query with `DISTINCT` and `ORDER BY`. -* Queries with `ARRAY JOIN` and `arrayFilter` no longer return an incorrect result. -* Fixed an error when reading an array column from a Nested structure ([#2066](https://github.com/yandex/ClickHouse/issues/2066)). -* Fixed an error when analyzing queries with a HAVING clause like `HAVING tuple IN (...)`. -* Fixed an error when analyzing queries with recursive aliases. -* Fixed an error when reading from ReplacingMergeTree with a condition in PREWHERE that filters all rows ([#2525](https://github.com/yandex/ClickHouse/issues/2525)). -* User profile settings were not applied when using sessions in the HTTP interface. -* Fixed how settings are applied from the command line parameters in clickhouse-local. -* The ZooKeeper client library now uses the session timeout received from the server. -* Fixed a bug in the ZooKeeper client library when the client waited for the server response longer than the timeout. -* Fixed pruning of parts for queries with conditions on partition key columns ([#2342](https://github.com/yandex/ClickHouse/issues/2342)). -* Merges are now possible after `CLEAR COLUMN IN PARTITION` ([#2315](https://github.com/yandex/ClickHouse/issues/2315)). -* Type mapping in the ODBC table function has been fixed ([sundy-li](https://github.com/yandex/ClickHouse/pull/2268)). -* Type comparisons have been fixed for `DateTime` with and without the time zone ([Alexander Bocharov](https://github.com/yandex/ClickHouse/pull/2400)). -* Fixed syntactic parsing and formatting of the `CAST` operator. -* Fixed insertion into a materialized view for the Distributed table engine ([Babacar Diassé](https://github.com/yandex/ClickHouse/pull/2411)). -* Fixed a race condition when writing data from the `Kafka` engine to materialized views ([Yangkuan Liu](https://github.com/yandex/ClickHouse/pull/2448)). -* Fixed SSRF in the remote() table function. -* Fixed exit behavior of `clickhouse-client` in multiline mode ([#2510](https://github.com/yandex/ClickHouse/issues/2510)). - -### Improvements: - -* Background tasks in replicated tables are now performed in a thread pool instead of in separate threads ([Silviu Caragea](https://github.com/yandex/ClickHouse/pull/1722)). -* Improved LZ4 compression performance. -* Faster analysis for queries with a large number of JOINs and sub-queries. -* The DNS cache is now updated automatically when there are too many network errors. -* Table inserts no longer occur if the insert into one of the materialized views is not possible because it has too many parts. -* Corrected the discrepancy in the event counters `Query`, `SelectQuery`, and `InsertQuery`. -* Expressions like `tuple IN (SELECT tuple)` are allowed if the tuple types match. -* A server with replicated tables can start even if you haven't configured ZooKeeper. -* When calculating the number of available CPU cores, limits on cgroups are now taken into account ([Atri Sharma](https://github.com/yandex/ClickHouse/pull/2325)). -* Added chown for config directories in the systemd config file ([Mikhail Shiryaev](https://github.com/yandex/ClickHouse/pull/2421)). - -### Build changes: - -* The gcc8 compiler can be used for builds. -* Added the ability to build llvm from submodule. -* The version of the librdkafka library has been updated to v0.11.4. -* Added the ability to use the system libcpuid library. The library version has been updated to 0.4.0. -* Fixed the build using the vectorclass library ([Babacar Diassé](https://github.com/yandex/ClickHouse/pull/2274)). -* Cmake now generates files for ninja by default (like when using `-G Ninja`). -* Added the ability to use the libtinfo library instead of libtermcap ([Georgy Kondratiev](https://github.com/yandex/ClickHouse/pull/2519)). -* Fixed a header file conflict in Fedora Rawhide ([#2520](https://github.com/yandex/ClickHouse/issues/2520)). - -### Backward incompatible changes: - -* Removed escaping in `Vertical` and `Pretty*` formats and deleted the `VerticalRaw` format. -* If servers with version 1.1.54388 (or newer) and servers with an older version are used simultaneously in a distributed query and the query has the `cast(x, 'Type')` expression without the `AS` keyword and doesn't have the word `cast` in uppercase, an exception will be thrown with a message like `Not found column cast(0, 'UInt8') in block`. Solution: Update the server on the entire cluster. - -## ClickHouse release 1.1.54385, 2018-06-01 - -### Bug fixes: - -* Fixed an error that in some cases caused ZooKeeper operations to block. - -## ClickHouse release 1.1.54383, 2018-05-22 - -### Bug fixes: - -* Fixed a slowdown of replication queue if a table has many replicas. - -## ClickHouse release 1.1.54381, 2018-05-14 - -### Bug fixes: - -* Fixed a nodes leak in ZooKeeper when ClickHouse loses connection to ZooKeeper server. - -## ClickHouse release 1.1.54380, 2018-04-21 - -### New features: - -* Added the table function `file(path, format, structure)`. An example reading bytes from `/dev/urandom`: `ln -s /dev/urandom /var/lib/clickhouse/user_files/random``clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10"`. - -### Improvements: - -* Subqueries can be wrapped in `()` brackets to enhance query readability. For example: `(SELECT 1) UNION ALL (SELECT 1)`. -* Simple `SELECT` queries from the `system.processes` table are not included in the `max_concurrent_queries` limit. - -### Bug fixes: - -* Fixed incorrect behavior of the `IN` operator when select from `MATERIALIZED VIEW`. -* Fixed incorrect filtering by partition index in expressions like `partition_key_column IN (...)`. -* Fixed inability to execute `OPTIMIZE` query on non-leader replica if `REANAME` was performed on the table. -* Fixed the authorization error when executing `OPTIMIZE` or `ALTER` queries on a non-leader replica. -* Fixed freezing of `KILL QUERY`. -* Fixed an error in ZooKeeper client library which led to loss of watches, freezing of distributed DDL queue, and slowdowns in the replication queue if a non-empty `chroot` prefix is used in the ZooKeeper configuration. - -### Backward incompatible changes: - -* Removed support for expressions like `(a, b) IN (SELECT (a, b))` (you can use the equivalent expression `(a, b) IN (SELECT a, b)`). In previous releases, these expressions led to undetermined `WHERE` filtering or caused errors. - -## ClickHouse release 1.1.54378, 2018-04-16 - -### New features: - -* Logging level can be changed without restarting the server. -* Added the `SHOW CREATE DATABASE` query. -* The `query_id` can be passed to `clickhouse-client` (elBroom). -* New setting: `max_network_bandwidth_for_all_users`. -* Added support for `ALTER TABLE ... PARTITION ... ` for `MATERIALIZED VIEW`. -* Added information about the size of data parts in uncompressed form in the system table. -* Server-to-server encryption support for distributed tables (`1` in the replica config in ``). -* Configuration of the table level for the `ReplicatedMergeTree` family in order to minimize the amount of data stored in Zookeeper: : `use_minimalistic_checksums_in_zookeeper = 1` -* Configuration of the `clickhouse-client` prompt. By default, server names are now output to the prompt. The server's display name can be changed. It's also sent in the `X-ClickHouse-Display-Name` HTTP header (Kirill Shvakov). -* Multiple comma-separated `topics` can be specified for the `Kafka` engine (Tobias Adamson) -* When a query is stopped by `KILL QUERY` or `replace_running_query`, the client receives the `Query was cancelled` exception instead of an incomplete result. - -### Improvements: - -* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run at the front of the replication queue. -* `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part. -* A `query_log` table is recreated on the fly if it was deleted manually (Kirill Shvakov). -* The `lengthUTF8` function runs faster (zhang2014). -* Improved performance of synchronous inserts in `Distributed` tables (`insert_distributed_sync = 1`) when there is a very large number of shards. -* The server accepts the `send_timeout` and `receive_timeout` settings from the client and applies them when connecting to the client (they are applied in reverse order: the server socket's `send_timeout` is set to the `receive_timeout` value received from the client, and vice versa). -* More robust crash recovery for asynchronous insertion into `Distributed` tables. -* The return type of the `countEqual` function changed from `UInt32` to `UInt64` (谢磊). - -### Bug fixes: - -* Fixed an error with `IN` when the left side of the expression is `Nullable`. -* Correct results are now returned when using tuples with `IN` when some of the tuple components are in the table index. -* The `max_execution_time` limit now works correctly with distributed queries. -* Fixed errors when calculating the size of composite columns in the `system.columns` table. -* Fixed an error when creating a temporary table `CREATE TEMPORARY TABLE IF NOT EXISTS.` -* Fixed errors in `StorageKafka` (##2075) -* Fixed server crashes from invalid arguments of certain aggregate functions. -* Fixed the error that prevented the `DETACH DATABASE` query from stopping background tasks for `ReplicatedMergeTree` tables. -* `Too many parts` state is less likely to happen when inserting into aggregated materialized views (##2084). -* Corrected recursive handling of substitutions in the config if a substitution must be followed by another substitution on the same level. -* Corrected the syntax in the metadata file when creating a `VIEW` that uses a query with `UNION ALL`. -* `SummingMergeTree` now works correctly for summation of nested data structures with a composite key. -* Fixed the possibility of a race condition when choosing the leader for `ReplicatedMergeTree` tables. - -### Build changes: - -* The build supports `ninja` instead of `make` and uses `ninja` by default for building releases. -* Renamed packages: `clickhouse-server-base` in `clickhouse-common-static`; `clickhouse-server-common` in `clickhouse-server`; `clickhouse-common-dbg` in `clickhouse-common-static-dbg`. To install, use `clickhouse-server clickhouse-client`. Packages with the old names will still load in the repositories for backward compatibility. - -### Backward incompatible changes: - -* Removed the special interpretation of an IN expression if an array is specified on the left side. Previously, the expression `arr IN (set)` was interpreted as "at least one `arr` element belongs to the `set`". To get the same behavior in the new version, write `arrayExists(x -> x IN (set), arr)`. -* Disabled the incorrect use of the socket option `SO_REUSEPORT`, which was incorrectly enabled by default in the Poco library. Note that on Linux there is no longer any reason to simultaneously specify the addresses `::` and `0.0.0.0` for listen – use just `::`, which allows listening to the connection both over IPv4 and IPv6 (with the default kernel config settings). You can also revert to the behavior from previous versions by specifying `1` in the config. - -## ClickHouse release 1.1.54370, 2018-03-16 - -### New features: - -* Added the `system.macros` table and auto updating of macros when the config file is changed. -* Added the `SYSTEM RELOAD CONFIG` query. -* Added the `maxIntersections(left_col, right_col)` aggregate function, which returns the maximum number of simultaneously intersecting intervals `[left; right]`. The `maxIntersectionsPosition(left, right)` function returns the beginning of the "maximum" interval. ([Michael Furmur](https://github.com/yandex/ClickHouse/pull/2012)). - -### Improvements: - -* When inserting data in a `Replicated` table, fewer requests are made to `ZooKeeper` (and most of the user-level errors have disappeared from the `ZooKeeper` log). -* Added the ability to create aliases for data sets. Example: `WITH (1, 2, 3) AS set SELECT number IN set FROM system.numbers LIMIT 10`. - -### Bug fixes: - -* Fixed the `Illegal PREWHERE` error when reading from Merge tables for `Distributed`tables. -* Added fixes that allow you to start clickhouse-server in IPv4-only Docker containers. -* Fixed a race condition when reading from system `system.parts_columns tables.` -* Removed double buffering during a synchronous insert to a `Distributed` table, which could have caused the connection to timeout. -* Fixed a bug that caused excessively long waits for an unavailable replica before beginning a `SELECT` query. -* Fixed incorrect dates in the `system.parts` table. -* Fixed a bug that made it impossible to insert data in a `Replicated` table if `chroot` was non-empty in the configuration of the `ZooKeeper` cluster. -* Fixed the vertical merging algorithm for an empty `ORDER BY` table. -* Restored the ability to use dictionaries in queries to remote tables, even if these dictionaries are not present on the requestor server. This functionality was lost in release 1.1.54362. -* Restored the behavior for queries like `SELECT * FROM remote('server2', default.table) WHERE col IN (SELECT col2 FROM default.table)` when the right side of the `IN` should use a remote `default.table` instead of a local one. This behavior was broken in version 1.1.54358. -* Removed extraneous error-level logging of `Not found column ... in block`. - -## Clickhouse Release 1.1.54362, 2018-03-11 - -### New features: - -* Aggregation without `GROUP BY` for an empty set (such as `SELECT count(*) FROM table WHERE 0`) now returns a result with one row with null values for aggregate functions, in compliance with the SQL standard. To restore the old behavior (return an empty result), set `empty_result_for_aggregation_by_empty_set` to 1. -* Added type conversion for `UNION ALL`. Different alias names are allowed in `SELECT` positions in `UNION ALL`, in compliance with the SQL standard. -* Arbitrary expressions are supported in `LIMIT BY` clauses. Previously, it was only possible to use columns resulting from `SELECT`. -* An index of `MergeTree` tables is used when `IN` is applied to a tuple of expressions from the columns of the primary key. Example: `WHERE (UserID, EventDate) IN ((123, '2000-01-01'), ...)` (Anastasiya Tsarkova). -* Added the `clickhouse-copier` tool for copying between clusters and resharding data (beta). -* Added consistent hashing functions: `yandexConsistentHash`, `jumpConsistentHash`, `sumburConsistentHash`. They can be used as a sharding key in order to reduce the amount of network traffic during subsequent reshardings. -* Added functions: `arrayAny`, `arrayAll`, `hasAny`, `hasAll`, `arrayIntersect`, `arrayResize`. -* Added the `arrayCumSum` function (Javi Santana). -* Added the `parseDateTimeBestEffort`, `parseDateTimeBestEffortOrZero`, and `parseDateTimeBestEffortOrNull` functions to read the DateTime from a string containing text in a wide variety of possible formats. -* Data can be partially reloaded from external dictionaries during updating (load just the records in which the value of the specified field greater than in the previous download) (Arsen Hakobyan). -* Added the `cluster` table function. Example: `cluster(cluster_name, db, table)`. The `remote` table function can accept the cluster name as the first argument, if it is specified as an identifier. -* The `remote` and `cluster` table functions can be used in `INSERT` requests. -* Added the `create_table_query` and `engine_full` virtual columns to the `system.tables`table . The `metadata_modification_time` column is virtual. -* Added the `data_path` and `metadata_path` columns to `system.tables`and` system.databases` tables, and added the `path` column to the `system.parts` and `system.parts_columns` tables. -* Added additional information about merges in the `system.part_log` table. -* An arbitrary partitioning key can be used for the `system.query_log` table (Kirill Shvakov). -* The `SHOW TABLES` query now also shows temporary tables. Added temporary tables and the `is_temporary` column to `system.tables` (zhang2014). -* Added `DROP TEMPORARY TABLE` and `EXISTS TEMPORARY TABLE` queries (zhang2014). -* Support for `SHOW CREATE TABLE` for temporary tables (zhang2014). -* Added the `system_profile` configuration parameter for the settings used by internal processes. -* Support for loading `object_id` as an attribute in `MongoDB` dictionaries (Pavel Litvinenko). -* Reading `null` as the default value when loading data for an external dictionary with the `MongoDB` source (Pavel Litvinenko). -* Reading `DateTime` values in the `Values` format from a Unix timestamp without single quotes. -* Failover is supported in `remote` table functions for cases when some of the replicas are missing the requested table. -* Configuration settings can be overridden in the command line when you run `clickhouse-server`. Example: `clickhouse-server -- --logger.level=information`. -* Implemented the `empty` function from a `FixedString` argument: the function returns 1 if the string consists entirely of null bytes (zhang2014). -* Added the `listen_try`configuration parameter for listening to at least one of the listen addresses without quitting, if some of the addresses can't be listened to (useful for systems with disabled support for IPv4 or IPv6). -* Added the `VersionedCollapsingMergeTree` table engine. -* Support for rows and arbitrary numeric types for the `library` dictionary source. -* `MergeTree` tables can be used without a primary key (you need to specify `ORDER BY tuple()`). -* A `Nullable` type can be `CAST` to a non-`Nullable` type if the argument is not `NULL`. -* `RENAME TABLE` can be performed for `VIEW`. -* Added the `throwIf` function. -* Added the `odbc_default_field_size` option, which allows you to extend the maximum size of the value loaded from an ODBC source (by default, it is 1024). -* The `system.processes` table and `SHOW PROCESSLIST` now have the `is_cancelled` and `peak_memory_usage` columns. - -### Improvements: - -* Limits and quotas on the result are no longer applied to intermediate data for `INSERT SELECT` queries or for `SELECT` subqueries. -* Fewer false triggers of `force_restore_data` when checking the status of `Replicated` tables when the server starts. -* Added the `allow_distributed_ddl` option. -* Nondeterministic functions are not allowed in expressions for `MergeTree` table keys. -* Files with substitutions from `config.d` directories are loaded in alphabetical order. -* Improved performance of the `arrayElement` function in the case of a constant multidimensional array with an empty array as one of the elements. Example: `[[1], []][x]`. -* The server starts faster now when using configuration files with very large substitutions (for instance, very large lists of IP networks). -* When running a query, table valued functions run once. Previously, `remote` and `mysql` table valued functions performed the same query twice to retrieve the table structure from a remote server. -* The `MkDocs` documentation generator is used. -* When you try to delete a table column that `DEFAULT`/`MATERIALIZED` expressions of other columns depend on, an exception is thrown (zhang2014). -* Added the ability to parse an empty line in text formats as the number 0 for `Float` data types. This feature was previously available but was lost in release 1.1.54342. -* `Enum` values can be used in `min`, `max`, `sum` and some other functions. In these cases, it uses the corresponding numeric values. This feature was previously available but was lost in the release 1.1.54337. -* Added `max_expanded_ast_elements` to restrict the size of the AST after recursively expanding aliases. - -### Bug fixes: - -* Fixed cases when unnecessary columns were removed from subqueries in error, or not removed from subqueries containing `UNION ALL`. -* Fixed a bug in merges for `ReplacingMergeTree` tables. -* Fixed synchronous insertions in `Distributed` tables (`insert_distributed_sync = 1`). -* Fixed segfault for certain uses of `FULL` and `RIGHT JOIN` with duplicate columns in subqueries. -* Fixed segfault for certain uses of `replace_running_query` and `KILL QUERY`. -* Fixed the order of the `source` and `last_exception` columns in the `system.dictionaries` table. -* Fixed a bug when the `DROP DATABASE` query did not delete the file with metadata. -* Fixed the `DROP DATABASE` query for `Dictionary` databases. -* Fixed the low precision of `uniqHLL12` and `uniqCombined` functions for cardinalities greater than 100 million items (Alex Bocharov). -* Fixed the calculation of implicit default values when necessary to simultaneously calculate default explicit expressions in `INSERT` queries (zhang2014). -* Fixed a rare case when a query to a `MergeTree` table couldn't finish (chenxing-xc). -* Fixed a crash that occurred when running a `CHECK` query for `Distributed` tables if all shards are local (chenxing.xc). -* Fixed a slight performance regression with functions that use regular expressions. -* Fixed a performance regression when creating multidimensional arrays from complex expressions. -* Fixed a bug that could cause an extra `FORMAT` section to appear in an `.sql` file with metadata. -* Fixed a bug that caused the `max_table_size_to_drop` limit to apply when trying to delete a `MATERIALIZED VIEW` looking at an explicitly specified table. -* Fixed incompatibility with old clients (old clients were sometimes sent data with the `DateTime('timezone')` type, which they do not understand). -* Fixed a bug when reading `Nested` column elements of structures that were added using `ALTER` but that are empty for the old partitions, when the conditions for these columns moved to `PREWHERE`. -* Fixed a bug when filtering tables by virtual `_table` columns in queries to `Merge` tables. -* Fixed a bug when using `ALIAS` columns in `Distributed` tables. -* Fixed a bug that made dynamic compilation impossible for queries with aggregate functions from the `quantile` family. -* Fixed a race condition in the query execution pipeline that occurred in very rare cases when using `Merge` tables with a large number of tables, and when using `GLOBAL` subqueries. -* Fixed a crash when passing arrays of different sizes to an `arrayReduce` function when using aggregate functions from multiple arguments. -* Prohibited the use of queries with `UNION ALL` in a `MATERIALIZED VIEW`. -* Fixed an error during initialization of the `part_log` system table when the server starts (by default, `part_log` is disabled). - -### Backward incompatible changes: - -* Removed the `distributed_ddl_allow_replicated_alter` option. This behavior is enabled by default. -* Removed the `strict_insert_defaults` setting. If you were using this functionality, write to `clickhouse-feedback@yandex-team.com`. -* Removed the `UnsortedMergeTree` engine. - -## Clickhouse Release 1.1.54343, 2018-02-05 - -* Added macros support for defining cluster names in distributed DDL queries and constructors of Distributed tables: `CREATE TABLE distr ON CLUSTER '{cluster}' (...) ENGINE = Distributed('{cluster}', 'db', 'table')`. -* Now queries like `SELECT ... FROM table WHERE expr IN (subquery)` are processed using the `table` index. -* Improved processing of duplicates when inserting to Replicated tables, so they no longer slow down execution of the replication queue. - -## Clickhouse Release 1.1.54342, 2018-01-22 - -This release contains bug fixes for the previous release 1.1.54337: - -* Fixed a regression in 1.1.54337: if the default user has readonly access, then the server refuses to start up with the message `Cannot create database in readonly mode`. -* Fixed a regression in 1.1.54337: on systems with systemd, logs are always written to syslog regardless of the configuration; the watchdog script still uses init.d. -* Fixed a regression in 1.1.54337: wrong default configuration in the Docker image. -* Fixed nondeterministic behavior of GraphiteMergeTree (you can see it in log messages `Data after merge is not byte-identical to the data on another replicas`). -* Fixed a bug that may lead to inconsistent merges after OPTIMIZE query to Replicated tables (you may see it in log messages `Part ... intersects the previous part`). -* Buffer tables now work correctly when MATERIALIZED columns are present in the destination table (by zhang2014). -* Fixed a bug in implementation of NULL. - -## Clickhouse Release 1.1.54337, 2018-01-18 - -### New features: - -* Added support for storage of multi-dimensional arrays and tuples (`Tuple` data type) in tables. -* Support for table functions for `DESCRIBE` and `INSERT` queries. Added support for subqueries in `DESCRIBE`. Examples: `DESC TABLE remote('host', default.hits)`; `DESC TABLE (SELECT 1)`; `INSERT INTO TABLE FUNCTION remote('host', default.hits)`. Support for `INSERT INTO TABLE` in addition to `INSERT INTO`. -* Improved support for time zones. The `DateTime` data type can be annotated with the timezone that is used for parsing and formatting in text formats. Example: `DateTime('Europe/Moscow')`. When timezones are specified in functions for `DateTime` arguments, the return type will track the timezone, and the value will be displayed as expected. -* Added the functions `toTimeZone`, `timeDiff`, `toQuarter`, `toRelativeQuarterNum`. The `toRelativeHour`/`Minute`/`Second` functions can take a value of type `Date` as an argument. The `now` function name is case-sensitive. -* Added the `toStartOfFifteenMinutes` function (Kirill Shvakov). -* Added the `clickhouse format` tool for formatting queries. -* Added the `format_schema_path` configuration parameter (Marek Vavruşa). It is used for specifying a schema in `Cap'n Proto` format. Schema files can be located only in the specified directory. -* Added support for config substitutions (`incl` and `conf.d`) for configuration of external dictionaries and models (Pavel Yakunin). -* Added a column with documentation for the `system.settings` table (Kirill Shvakov). -* Added the `system.parts_columns` table with information about column sizes in each data part of `MergeTree` tables. -* Added the `system.models` table with information about loaded `CatBoost` machine learning models. -* Added the `mysql` and `odbc` table function and corresponding `MySQL` and `ODBC` table engines for accessing remote databases. This functionality is in the beta stage. -* Added the possibility to pass an argument of type `AggregateFunction` for the `groupArray` aggregate function (so you can create an array of states of some aggregate function). -* Removed restrictions on various combinations of aggregate function combinators. For example, you can use `avgForEachIf` as well as `avgIfForEach` aggregate functions, which have different behaviors. -* The `-ForEach` aggregate function combinator is extended for the case of aggregate functions of multiple arguments. -* Added support for aggregate functions of `Nullable` arguments even for cases when the function returns a non-`Nullable` result (added with the contribution of Silviu Caragea). Example: `groupArray`, `groupUniqArray`, `topK`. -* Added the `max_client_network_bandwidth` for `clickhouse-client` (Kirill Shvakov). -* Users with the ` readonly = 2` setting are allowed to work with TEMPORARY tables (CREATE, DROP, INSERT...) (Kirill Shvakov). -* Added support for using multiple consumers with the `Kafka` engine. Extended configuration options for `Kafka` (Marek Vavruša). -* Added the `intExp3` and `intExp4` functions. -* Added the `sumKahan` aggregate function. -* Added the to * Number* OrNull functions, where * Number* is a numeric type. -* Added support for `WITH` clauses for an `INSERT SELECT` query (author: zhang2014). -* Added settings: `http_connection_timeout`, `http_send_timeout`, `http_receive_timeout`. In particular, these settings are used for downloading data parts for replication. Changing these settings allows for faster failover if the network is overloaded. -* Added support for `ALTER` for tables of type `Null` (Anastasiya Tsarkova). -* The `reinterpretAsString` function is extended for all data types that are stored contiguously in memory. -* Added the `--silent` option for the `clickhouse-local` tool. It suppresses printing query execution info in stderr. -* Added support for reading values of type `Date` from text in a format where the month and/or day of the month is specified using a single digit instead of two digits (Amos Bird). - -### Performance optimizations: - -* Improved performance of aggregate functions `min`, `max`, `any`, `anyLast`, `anyHeavy`, `argMin`, `argMax` from string arguments. -* Improved performance of the functions `isInfinite`, `isFinite`, `isNaN`, `roundToExp2`. -* Improved performance of parsing and formatting `Date` and `DateTime` type values in text format. -* Improved performance and precision of parsing floating point numbers. -* Lowered memory usage for `JOIN` in the case when the left and right parts have columns with identical names that are not contained in `USING` . -* Improved performance of aggregate functions `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr` by reducing computational stability. The old functions are available under the names `varSampStable`, `varPopStable`, `stddevSampStable`, `stddevPopStable`, `covarSampStable`, `covarPopStable`, `corrStable`. - -### Bug fixes: - -* Fixed data deduplication after running a `DROP` or `DETACH PARTITION` query. In the previous version, dropping a partition and inserting the same data again was not working because inserted blocks were considered duplicates. -* Fixed a bug that could lead to incorrect interpretation of the `WHERE` clause for ` CREATE MATERIALIZED VIEW` queries with `POPULATE` . -* Fixed a bug in using the `root_path` parameter in the `zookeeper_servers` configuration. -* Fixed unexpected results of passing the `Date` argument to `toStartOfDay` . -* Fixed the `addMonths` and `subtractMonths` functions and the arithmetic for ` INTERVAL n MONTH` in cases when the result has the previous year. -* Added missing support for the `UUID` data type for `DISTINCT` , `JOIN` , and `uniq` aggregate functions and external dictionaries (Evgeniy Ivanov). Support for `UUID` is still incomplete. -* Fixed `SummingMergeTree` behavior in cases when the rows summed to zero. -* Various fixes for the `Kafka` engine (Marek Vavruša). -* Fixed incorrect behavior of the `Join` table engine (Amos Bird). -* Fixed incorrect allocator behavior under FreeBSD and OS X. -* The `extractAll` function now supports empty matches. -* Fixed an error that blocked usage of `libressl` instead of `openssl` . -* Fixed the ` CREATE TABLE AS SELECT` query from temporary tables. -* Fixed non-atomicity of updating the replication queue. This could lead to replicas being out of sync until the server restarts. -* Fixed possible overflow in `gcd` , `lcm` and `modulo` (`%` operator) (Maks Skorokhod). -* `-preprocessed` files are now created after changing `umask` (`umask` can be changed in the config). -* Fixed a bug in the background check of parts (`MergeTreePartChecker` ) when using a custom partition key. -* Fixed parsing of tuples (values of the `Tuple` data type) in text formats. -* Improved error messages about incompatible types passed to `multiIf` , `array` and some other functions. -* Redesigned support for `Nullable` types. Fixed bugs that may lead to a server crash. Fixed almost all other bugs related to ` NULL` support: incorrect type conversions in INSERT SELECT, insufficient support for Nullable in HAVING and PREWHERE, `join_use_nulls` mode, Nullable types as arguments of `OR` operator, etc. -* Fixed various bugs related to internal semantics of data types. Examples: unnecessary summing of `Enum` type fields in `SummingMergeTree` ; alignment of `Enum` types in `Pretty` formats, etc. -* Stricter checks for allowed combinations of composite columns. -* Fixed the overflow when specifying a very large parameter for the `FixedString` data type. -* Fixed a bug in the `topK` aggregate function in a generic case. -* Added the missing check for equality of array sizes in arguments of n-ary variants of aggregate functions with an `-Array` combinator. -* Fixed a bug in `--pager` for `clickhouse-client` (author: ks1322). -* Fixed the precision of the `exp10` function. -* Fixed the behavior of the `visitParamExtract` function for better compliance with documentation. -* Fixed the crash when incorrect data types are specified. -* Fixed the behavior of `DISTINCT` in the case when all columns are constants. -* Fixed query formatting in the case of using the `tupleElement` function with a complex constant expression as the tuple element index. -* Fixed a bug in `Dictionary` tables for `range_hashed` dictionaries. -* Fixed a bug that leads to excessive rows in the result of `FULL` and ` RIGHT JOIN` (Amos Bird). -* Fixed a server crash when creating and removing temporary files in `config.d` directories during config reload. -* Fixed the ` SYSTEM DROP DNS CACHE` query: the cache was flushed but addresses of cluster nodes were not updated. -* Fixed the behavior of ` MATERIALIZED VIEW` after executing ` DETACH TABLE` for the table under the view (Marek Vavruša). - -### Build improvements: - -* The `pbuilder` tool is used for builds. The build process is almost completely independent of the build host environment. -* A single build is used for different OS versions. Packages and binaries have been made compatible with a wide range of Linux systems. -* Added the `clickhouse-test` package. It can be used to run functional tests. -* The source tarball can now be published to the repository. It can be used to reproduce the build without using GitHub. -* Added limited integration with Travis CI. Due to limits on build time in Travis, only the debug build is tested and a limited subset of tests are run. -* Added support for `Cap'n'Proto` in the default build. -* Changed the format of documentation sources from `Restricted Text` to `Markdown`. -* Added support for `systemd` (Vladimir Smirnov). It is disabled by default due to incompatibility with some OS images and can be enabled manually. -* For dynamic code generation, `clang` and `lld` are embedded into the `clickhouse` binary. They can also be invoked as ` clickhouse clang` and ` clickhouse lld` . -* Removed usage of GNU extensions from the code. Enabled the `-Wextra` option. When building with `clang` the default is `libc++` instead of `libstdc++`. -* Extracted `clickhouse_parsers` and `clickhouse_common_io` libraries to speed up builds of various tools. - -### Backward incompatible changes: - -* The format for marks in `Log` type tables that contain `Nullable` columns was changed in a backward incompatible way. If you have these tables, you should convert them to the `TinyLog` type before starting up the new server version. To do this, replace `ENGINE = Log` with `ENGINE = TinyLog` in the corresponding `.sql` file in the `metadata` directory. If your table doesn't have `Nullable` columns or if the type of your table is not `Log`, then you don't need to do anything. -* Removed the `experimental_allow_extended_storage_definition_syntax` setting. Now this feature is enabled by default. -* The `runningIncome` function was renamed to `runningDifferenceStartingWithFirstvalue` to avoid confusion. -* Removed the ` FROM ARRAY JOIN arr` syntax when ARRAY JOIN is specified directly after FROM with no table (Amos Bird). -* Removed the `BlockTabSeparated` format that was used solely for demonstration purposes. -* Changed the state format for aggregate functions `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. If you have stored states of these aggregate functions in tables (using the `AggregateFunction` data type or materialized views with corresponding states), please write to clickhouse-feedback@yandex-team.com. -* In previous server versions there was an undocumented feature: if an aggregate function depends on parameters, you can still specify it without parameters in the AggregateFunction data type. Example: `AggregateFunction(quantiles, UInt64)` instead of `AggregateFunction(quantiles(0.5, 0.9), UInt64)`. This feature was lost. Although it was undocumented, we plan to support it again in future releases. -* Enum data types cannot be used in min/max aggregate functions. This ability will be returned in the next release. - -### Please note when upgrading: - -* When doing a rolling update on a cluster, at the point when some of the replicas are running the old version of ClickHouse and some are running the new version, replication is temporarily stopped and the message ` unknown parameter 'shard'` appears in the log. Replication will continue after all replicas of the cluster are updated. -* If different versions of ClickHouse are running on the cluster servers, it is possible that distributed queries using the following functions will have incorrect results: `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. You should update all cluster nodes. - -## ClickHouse release 1.1.54327, 2017-12-21 - -This release contains bug fixes for the previous release 1.1.54318: - -* Fixed bug with possible race condition in replication that could lead to data loss. This issue affects versions 1.1.54310 and 1.1.54318. If you use one of these versions with Replicated tables, the update is strongly recommended. This issue shows in logs in Warning messages like ` Part ... from own log doesn't exist.` The issue is relevant even if you don't see these messages in logs. - -## ClickHouse release 1.1.54318, 2017-11-30 - -This release contains bug fixes for the previous release 1.1.54310: - -* Fixed incorrect row deletions during merges in the SummingMergeTree engine -* Fixed a memory leak in unreplicated MergeTree engines -* Fixed performance degradation with frequent inserts in MergeTree engines -* Fixed an issue that was causing the replication queue to stop running -* Fixed rotation and archiving of server logs - -## ClickHouse release 1.1.54310, 2017-11-01 - -### New features: - -* Custom partitioning key for the MergeTree family of table engines. -* [ Kafka](https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/kafka) table engine. -* Added support for loading [CatBoost](https://catboost.yandex/) models and applying them to data stored in ClickHouse. -* Added support for time zones with non-integer offsets from UTC. -* Added support for arithmetic operations with time intervals. -* The range of values for the Date and DateTime types is extended to the year 2105. -* Added the ` CREATE MATERIALIZED VIEW x TO y` query (specifies an existing table for storing the data of a materialized view). -* Added the `ATTACH TABLE` query without arguments. -* The processing logic for Nested columns with names ending in -Map in a SummingMergeTree table was extracted to the sumMap aggregate function. You can now specify such columns explicitly. -* Max size of the IP trie dictionary is increased to 128M entries. -* Added the getSizeOfEnumType function. -* Added the sumWithOverflow aggregate function. -* Added support for the Cap'n Proto input format. -* You can now customize compression level when using the zstd algorithm. - -### Backward incompatible changes: - -* Creation of temporary tables with an engine other than Memory is not allowed. -* Explicit creation of tables with the View or MaterializedView engine is not allowed. -* During table creation, a new check verifies that the sampling key expression is included in the primary key. - -### Bug fixes: - -* Fixed hangups when synchronously inserting into a Distributed table. -* Fixed nonatomic adding and removing of parts in Replicated tables. -* Data inserted into a materialized view is not subjected to unnecessary deduplication. -* Executing a query to a Distributed table for which the local replica is lagging and remote replicas are unavailable does not result in an error anymore. -* Users don't need access permissions to the `default` database to create temporary tables anymore. -* Fixed crashing when specifying the Array type without arguments. -* Fixed hangups when the disk volume containing server logs is full. -* Fixed an overflow in the toRelativeWeekNum function for the first week of the Unix epoch. - -### Build improvements: - -* Several third-party libraries (notably Poco) were updated and converted to git submodules. - -## ClickHouse release 1.1.54304, 2017-10-19 - -### New features: - -* TLS support in the native protocol (to enable, set `tcp_ssl_port` in `config.xml` ). - -### Bug fixes: - -* `ALTER` for replicated tables now tries to start running as soon as possible. -* Fixed crashing when reading data with the setting `preferred_block_size_bytes=0.` -* Fixed crashes of `clickhouse-client` when pressing ` Page Down` -* Correct interpretation of certain complex queries with `GLOBAL IN` and `UNION ALL` -* `FREEZE PARTITION` always works atomically now. -* Empty POST requests now return a response with code 411. -* Fixed interpretation errors for expressions like `CAST(1 AS Nullable(UInt8)).` -* Fixed an error when reading `Array(Nullable(String))` columns from `MergeTree` tables. -* Fixed crashing when parsing queries like `SELECT dummy AS dummy, dummy AS b` -* Users are updated correctly with invalid `users.xml` -* Correct handling when an executable dictionary returns a non-zero response code. - -## ClickHouse release 1.1.54292, 2017-09-20 - -### New features: - -* Added the `pointInPolygon` function for working with coordinates on a coordinate plane. -* Added the `sumMap` aggregate function for calculating the sum of arrays, similar to `SummingMergeTree`. -* Added the `trunc` function. Improved performance of the rounding functions (`round`, `floor`, `ceil`, `roundToExp2`) and corrected the logic of how they work. Changed the logic of the `roundToExp2` function for fractions and negative numbers. -* The ClickHouse executable file is now less dependent on the libc version. The same ClickHouse executable file can run on a wide variety of Linux systems. There is still a dependency when using compiled queries (with the setting ` compile = 1` , which is not used by default). -* Reduced the time needed for dynamic compilation of queries. - -### Bug fixes: - -* Fixed an error that sometimes produced ` part ... intersects previous part` messages and weakened replica consistency. -* Fixed an error that caused the server to lock up if ZooKeeper was unavailable during shutdown. -* Removed excessive logging when restoring replicas. -* Fixed an error in the UNION ALL implementation. -* Fixed an error in the concat function that occurred if the first column in a block has the Array type. -* Progress is now displayed correctly in the system.merges table. - -## ClickHouse release 1.1.54289, 2017-09-13 - -### New features: - -* `SYSTEM` queries for server administration: `SYSTEM RELOAD DICTIONARY`, `SYSTEM RELOAD DICTIONARIES`, `SYSTEM DROP DNS CACHE`, `SYSTEM SHUTDOWN`, `SYSTEM KILL`. -* Added functions for working with arrays: `concat`, `arraySlice`, `arrayPushBack`, `arrayPushFront`, `arrayPopBack`, `arrayPopFront`. -* Added `root` and `identity` parameters for the ZooKeeper configuration. This allows you to isolate individual users on the same ZooKeeper cluster. -* Added aggregate functions `groupBitAnd`, `groupBitOr`, and `groupBitXor` (for compatibility, they are also available under the names `BIT_AND`, `BIT_OR`, and `BIT_XOR`). -* External dictionaries can be loaded from MySQL by specifying a socket in the filesystem. -* External dictionaries can be loaded from MySQL over SSL (`ssl_cert`, `ssl_key`, `ssl_ca` parameters). -* Added the `max_network_bandwidth_for_user` setting to restrict the overall bandwidth use for queries per user. -* Support for `DROP TABLE` for temporary tables. -* Support for reading `DateTime` values in Unix timestamp format from the `CSV` and `JSONEachRow` formats. -* Lagging replicas in distributed queries are now excluded by default (the default threshold is 5 minutes). -* FIFO locking is used during ALTER: an ALTER query isn't blocked indefinitely for continuously running queries. -* Option to set `umask` in the config file. -* Improved performance for queries with `DISTINCT` . - -### Bug fixes: - -* Improved the process for deleting old nodes in ZooKeeper. Previously, old nodes sometimes didn't get deleted if there were very frequent inserts, which caused the server to be slow to shut down, among other things. -* Fixed randomization when choosing hosts for the connection to ZooKeeper. -* Fixed the exclusion of lagging replicas in distributed queries if the replica is localhost. -* Fixed an error where a data part in a `ReplicatedMergeTree` table could be broken after running ` ALTER MODIFY` on an element in a `Nested` structure. -* Fixed an error that could cause SELECT queries to "hang". -* Improvements to distributed DDL queries. -* Fixed the query `CREATE TABLE ... AS `. -* Resolved the deadlock in the ` ALTER ... CLEAR COLUMN IN PARTITION` query for `Buffer` tables. -* Fixed the invalid default value for `Enum` s (0 instead of the minimum) when using the `JSONEachRow` and `TSKV` formats. -* Resolved the appearance of zombie processes when using a dictionary with an `executable` source. -* Fixed segfault for the HEAD query. - -### Improved workflow for developing and assembling ClickHouse: - -* You can use `pbuilder` to build ClickHouse. -* You can use `libc++` instead of `libstdc++` for builds on Linux. -* Added instructions for using static code analysis tools: `Coverage`, `clang-tidy`, `cppcheck`. - -### Please note when upgrading: - -* There is now a higher default value for the MergeTree setting `max_bytes_to_merge_at_max_space_in_pool` (the maximum total size of data parts to merge, in bytes): it has increased from 100 GiB to 150 GiB. This might result in large merges running after the server upgrade, which could cause an increased load on the disk subsystem. If the free space available on the server is less than twice the total amount of the merges that are running, this will cause all other merges to stop running, including merges of small data parts. As a result, INSERT requests will fail with the message "Merges are processing significantly slower than inserts." Use the ` SELECT * FROM system.merges` request to monitor the situation. You can also check the `DiskSpaceReservedForMerge` metric in the `system.metrics` table, or in Graphite. You don't need to do anything to fix this, since the issue will resolve itself once the large merges finish. If you find this unacceptable, you can restore the previous value for the `max_bytes_to_merge_at_max_space_in_pool` setting. To do this, go to the section in config.xml, set ```107374182400` and restart the server. - -## ClickHouse release 1.1.54284, 2017-08-29 - -* This is a bugfix release for the previous 1.1.54282 release. It fixes leaks in the parts directory in ZooKeeper. - -## ClickHouse release 1.1.54282, 2017-08-23 - -This release contains bug fixes for the previous release 1.1.54276: - -* Fixed `DB::Exception: Assertion violation: !_path.empty()` when inserting into a Distributed table. -* Fixed parsing when inserting in RowBinary format if input data starts with';'. -* Errors during runtime compilation of certain aggregate functions (e.g. `groupArray()`). - -## Clickhouse Release 1.1.54276, 2017-08-16 - -### New features: - -* Added an optional WITH section for a SELECT query. Example query: `WITH 1+1 AS a SELECT a, a*a` -* INSERT can be performed synchronously in a Distributed table: OK is returned only after all the data is saved on all the shards. This is activated by the setting insert_distributed_sync=1. -* Added the UUID data type for working with 16-byte identifiers. -* Added aliases of CHAR, FLOAT and other types for compatibility with the Tableau. -* Added the functions toYYYYMM, toYYYYMMDD, and toYYYYMMDDhhmmss for converting time into numbers. -* You can use IP addresses (together with the hostname) to identify servers for clustered DDL queries. -* Added support for non-constant arguments and negative offsets in the function `substring(str, pos, len).` -* Added the max_size parameter for the `groupArray(max_size)(column)` aggregate function, and optimized its performance. - -### Main changes: - -* Security improvements: all server files are created with 0640 permissions (can be changed via config parameter). -* Improved error messages for queries with invalid syntax. -* Significantly reduced memory consumption and improved performance when merging large sections of MergeTree data. -* Significantly increased the performance of data merges for the ReplacingMergeTree engine. -* Improved performance for asynchronous inserts from a Distributed table by combining multiple source inserts. To enable this functionality, use the setting distributed_directory_monitor_batch_inserts=1. - -### Backward incompatible changes: - -* Changed the binary format of aggregate states of `groupArray(array_column)` functions for arrays. - -### Complete list of changes: - -* Added the `output_format_json_quote_denormals` setting, which enables outputting nan and inf values in JSON format. -* Optimized stream allocation when reading from a Distributed table. -* Settings can be configured in readonly mode if the value doesn't change. -* Added the ability to retrieve non-integer granules of the MergeTree engine in order to meet restrictions on the block size specified in the preferred_block_size_bytes setting. The purpose is to reduce the consumption of RAM and increase cache locality when processing queries from tables with large columns. -* Efficient use of indexes that contain expressions like `toStartOfHour(x)` for conditions like `toStartOfHour(x) op сonstexpr.` -* Added new settings for MergeTree engines (the merge_tree section in config.xml): - - replicated_deduplication_window_seconds sets the number of seconds allowed for deduplicating inserts in Replicated tables. - - cleanup_delay_period sets how often to start cleanup to remove outdated data. - - replicated_can_become_leader can prevent a replica from becoming the leader (and assigning merges). -* Accelerated cleanup to remove outdated data from ZooKeeper. -* Multiple improvements and fixes for clustered DDL queries. Of particular interest is the new setting distributed_ddl_task_timeout, which limits the time to wait for a response from the servers in the cluster. -* Improved display of stack traces in the server logs. -* Added the "none" value for the compression method. -* You can use multiple dictionaries_config sections in config.xml. -* It is possible to connect to MySQL through a socket in the file system. -* The system.parts table has a new column with information about the size of marks, in bytes. - -### Bug fixes: - -* Distributed tables using a Merge table now work correctly for a SELECT query with a condition on the `_table` field. -* Fixed a rare race condition in ReplicatedMergeTree when checking data parts. -* Fixed possible freezing on "leader election" when starting a server. -* The max_replica_delay_for_distributed_queries setting was ignored when using a local replica of the data source. This has been fixed. -* Fixed incorrect behavior of `ALTER TABLE CLEAR COLUMN IN PARTITION` when attempting to clean a non-existing column. -* Fixed an exception in the multiIf function when using empty arrays or strings. -* Fixed excessive memory allocations when deserializing Native format. -* Fixed incorrect auto-update of Trie dictionaries. -* Fixed an exception when running queries with a GROUP BY clause from a Merge table when using SAMPLE. -* Fixed a crash of GROUP BY when using distributed_aggregation_memory_efficient=1. -* Now you can specify the database.table in the right side of IN and JOIN. -* Too many threads were used for parallel aggregation. This has been fixed. -* Fixed how the "if" function works with FixedString arguments. -* SELECT worked incorrectly from a Distributed table for shards with a weight of 0. This has been fixed. -* Running `CREATE VIEW IF EXISTS no longer causes crashes.` -* Fixed incorrect behavior when input_format_skip_unknown_fields=1 is set and there are negative numbers. -* Fixed an infinite loop in the `dictGetHierarchy()` function if there is some invalid data in the dictionary. -* Fixed `Syntax error: unexpected (...)` errors when running distributed queries with subqueries in an IN or JOIN clause and Merge tables. -* Fixed an incorrect interpretation of a SELECT query from Dictionary tables. -* Fixed the "Cannot mremap" error when using arrays in IN and JOIN clauses with more than 2 billion elements. -* Fixed the failover for dictionaries with MySQL as the source. - -### Improved workflow for developing and assembling ClickHouse: - -* Builds can be assembled in Arcadia. -* You can use gcc 7 to compile ClickHouse. -* Parallel builds using ccache+distcc are faster now. - -## ClickHouse release 1.1.54245, 2017-07-04 - -### New features: - -* Distributed DDL (for example, `CREATE TABLE ON CLUSTER`) -* The replicated request `ALTER TABLE CLEAR COLUMN IN PARTITION.` -* The engine for Dictionary tables (access to dictionary data in the form of a table). -* Dictionary database engine (this type of database automatically has Dictionary tables available for all the connected external dictionaries). -* You can check for updates to the dictionary by sending a request to the source. -* Qualified column names -* Quoting identifiers using double quotation marks. -* Sessions in the HTTP interface. -* The OPTIMIZE query for a Replicated table can can run not only on the leader. - -### Backward incompatible changes: - -* Removed SET GLOBAL. - -### Minor changes: - -* Now after an alert is triggered, the log prints the full stack trace. -* Relaxed the verification of the number of damaged/extra data parts at startup (there were too many false positives). - -### Bug fixes: - -* Fixed a bad connection "sticking" when inserting into a Distributed table. -* GLOBAL IN now works for a query from a Merge table that looks at a Distributed table. -* The incorrect number of cores was detected on a Google Compute Engine virtual machine. This has been fixed. -* Changes in how an executable source of cached external dictionaries works. -* Fixed the comparison of strings containing null characters. -* Fixed the comparison of Float32 primary key fields with constants. -* Previously, an incorrect estimate of the size of a field could lead to overly large allocations. -* Fixed a crash when querying a Nullable column added to a table using ALTER. -* Fixed a crash when sorting by a Nullable column, if the number of rows is less than LIMIT. -* Fixed an ORDER BY subquery consisting of only constant values. -* Previously, a Replicated table could remain in the invalid state after a failed DROP TABLE. -* Aliases for scalar subqueries with empty results are no longer lost. -* Now a query that used compilation does not fail with an error if the .so file gets damaged. +## ClickHouse release 18.14.15, 2018-11-21 + +### Bug fixes: +* The size of memory chunk was overestimated while deserializing the column of type `Array(String)` that leads to "Memory limit exceeded" errors. The issue appeared in version 18.12.13. [#3589](https://github.com/yandex/ClickHouse/issues/3589) + +## ClickHouse release 18.14.14, 2018-11-20 + +### Bug fixes: +* Fixed `ON CLUSTER` queries when cluster configured as secure (flag ``). [#3599](https://github.com/yandex/ClickHouse/pull/3599) + +### Build changes: +* Fixed problems (llvm-7 from system, macos) [#3582](https://github.com/yandex/ClickHouse/pull/3582) + +## ClickHouse release 18.14.11, 2018-10-29 + +### Bug fixes: + +* Fixed the error `Block structure mismatch in UNION stream: different number of columns` in LIMIT queries. [#2156](https://github.com/yandex/ClickHouse/issues/2156) +* Fixed errors when merging data in tables containing arrays inside Nested structures. [#3397](https://github.com/yandex/ClickHouse/pull/3397) +* Fixed incorrect query results if the `merge_tree_uniform_read_distribution` setting is disabled (it is enabled by default). [#3429](https://github.com/yandex/ClickHouse/pull/3429) +* Fixed an error on inserts to a Distributed table in Native format. [#3411](https://github.com/yandex/ClickHouse/issues/3411) + +## ClickHouse release 18.14.10, 2018-10-23 + +* The `compile_expressions` setting (JIT compilation of expressions) is disabled by default. [#3410](https://github.com/yandex/ClickHouse/pull/3410) +* The `enable_optimize_predicate_expression` setting is disabled by default. + +## ClickHouse release 18.14.9, 2018-10-16 + +### New features: + +* The `WITH CUBE` modifier for `GROUP BY` (the alternative syntax `GROUP BY CUBE(...)` is also available). [#3172](https://github.com/yandex/ClickHouse/pull/3172) +* Added the `formatDateTime` function. [Alexandr Krasheninnikov](https://github.com/yandex/ClickHouse/pull/2770) +* Added the `JDBC` table engine and `jdbc` table function (requires installing clickhouse-jdbc-bridge). [Alexandr Krasheninnikov](https://github.com/yandex/ClickHouse/pull/3210) +* Added functions for working with the ISO week number: `toISOWeek`, `toISOYear`, `toStartOfISOYear`, and `toDayOfYear`. [#3146](https://github.com/yandex/ClickHouse/pull/3146) +* Now you can use `Nullable` columns for `MySQL` and `ODBC` tables. [#3362](https://github.com/yandex/ClickHouse/pull/3362) +* Nested data structures can be read as nested objects in `JSONEachRow` format. Added the `input_format_import_nested_json` setting. [Veloman Yunkan](https://github.com/yandex/ClickHouse/pull/3144) +* Parallel processing is available for many `MATERIALIZED VIEW`s when inserting data. See the `parallel_view_processing` setting. [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3208) +* Added the `SYSTEM FLUSH LOGS` query (forced log flushes to system tables such as `query_log`) [#3321](https://github.com/yandex/ClickHouse/pull/3321) +* Now you can use pre-defined `database` and `table` macros when declaring `Replicated` tables. [#3251](https://github.com/yandex/ClickHouse/pull/3251) +* Added the ability to read `Decimal` type values in engineering notation (indicating powers of ten). [#3153](https://github.com/yandex/ClickHouse/pull/3153) + +### Experimental features: + +* Optimization of the GROUP BY clause for `LowCardinality data types.` [#3138](https://github.com/yandex/ClickHouse/pull/3138) +* Optimized calculation of expressions for `LowCardinality data types.` [#3200](https://github.com/yandex/ClickHouse/pull/3200) + +### Improvements: + +* Significantly reduced memory consumption for requests with `ORDER BY` and `LIMIT`. See the `max_bytes_before_remerge_sort` setting. [#3205](https://github.com/yandex/ClickHouse/pull/3205) +* In the absence of `JOIN` (`LEFT`, `INNER`, ...), `INNER JOIN` is assumed. [#3147](https://github.com/yandex/ClickHouse/pull/3147) +* Qualified asterisks work correctly in queries with `JOIN`. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3202) +* The `ODBC` table engine correctly chooses the method for quoting identifiers in the SQL dialect of a remote database. [Alexandr Krasheninnikov](https://github.com/yandex/ClickHouse/pull/3210) +* The `compile_expressions` setting (JIT compilation of expressions) is enabled by default. +* Fixed behavior for simultaneous DROP DATABASE/TABLE IF EXISTS and CREATE DATABASE/TABLE IF NOT EXISTS. Previously, a `CREATE DATABASE ... IF NOT EXISTS` query could return the error message "File ... already exists", and the `CREATE TABLE ... IF NOT EXISTS` and `DROP TABLE IF EXISTS` queries could return `Table ... is creating or attaching right now`. [#3101](https://github.com/yandex/ClickHouse/pull/3101) +* LIKE and IN expressions with a constant right half are passed to the remote server when querying from MySQL or ODBC tables. [#3182](https://github.com/yandex/ClickHouse/pull/3182) +* Comparisons with constant expressions in a WHERE clause are passed to the remote server when querying from MySQL and ODBC tables. Previously, only comparisons with constants were passed. [#3182](https://github.com/yandex/ClickHouse/pull/3182) +* Correct calculation of row width in the terminal for `Pretty` formats, including strings with hieroglyphs. [Amos Bird](https://github.com/yandex/ClickHouse/pull/3257). +* `ON CLUSTER` can be specified for `ALTER UPDATE` queries. +* Improved performance for reading data in `JSONEachRow` format. [#3332](https://github.com/yandex/ClickHouse/pull/3332) +* Added synonyms for the `LENGTH` and `CHARACTER_LENGTH` functions for compatibility. The `CONCAT` function is no longer case-sensitive. [#3306](https://github.com/yandex/ClickHouse/pull/3306) +* Added the `TIMESTAMP` synonym for the `DateTime` type. [#3390](https://github.com/yandex/ClickHouse/pull/3390) +* There is always space reserved for query_id in the server logs, even if the log line is not related to a query. This makes it easier to parse server text logs with third-party tools. +* Memory consumption by a query is logged when it exceeds the next level of an integer number of gigabytes. [#3205](https://github.com/yandex/ClickHouse/pull/3205) +* Added compatibility mode for the case when the client library that uses the Native protocol sends fewer columns by mistake than the server expects for the INSERT query. This scenario was possible when using the clickhouse-cpp library. Previously, this scenario caused the server to crash. [#3171](https://github.com/yandex/ClickHouse/pull/3171) +* In a user-defined WHERE expression in `clickhouse-copier`, you can now use a `partition_key` alias (for additional filtering by source table partition). This is useful if the partitioning scheme changes during copying, but only changes slightly. [#3166](https://github.com/yandex/ClickHouse/pull/3166) +* The workflow of the `Kafka` engine has been moved to a background thread pool in order to automatically reduce the speed of data reading at high loads. [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3215). +* Support for reading `Tuple` and `Nested` values of structures like `struct` in the `Cap'n'Proto format`. [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3216) +* The list of top-level domains for the `firstSignificantSubdomain` function now includes the domain `biz`. [decaseal](https://github.com/yandex/ClickHouse/pull/3219) +* In the configuration of external dictionaries, `null_value` is interpreted as the value of the default data type. [#3330](https://github.com/yandex/ClickHouse/pull/3330) +* Support for the `intDiv` and `intDivOrZero` functions for `Decimal`. [b48402e8](https://github.com/yandex/ClickHouse/commit/b48402e8712e2b9b151e0eef8193811d433a1264) +* Support for the `Date`, `DateTime`, `UUID`, and `Decimal` types as a key for the `sumMap` aggregate function. [#3281](https://github.com/yandex/ClickHouse/pull/3281) +* Support for the `Decimal` data type in external dictionaries. [#3324](https://github.com/yandex/ClickHouse/pull/3324) +* Support for the `Decimal` data type in `SummingMergeTree` tables. [#3348](https://github.com/yandex/ClickHouse/pull/3348) +* Added specializations for `UUID` in `if`. [#3366](https://github.com/yandex/ClickHouse/pull/3366) +* Reduced the number of `open` and `close` system calls when reading from a `MergeTree table`. [#3283](https://github.com/yandex/ClickHouse/pull/3283) +* A `TRUNCATE TABLE` query can be executed on any replica (the query is passed to the leader replica). [Kirill Shvakov](https://github.com/yandex/ClickHouse/pull/3375) + +### Bug fixes: + +* Fixed an issue with `Dictionary` tables for `range_hashed` dictionaries. This error occurred in version 18.12.17. [#1702](https://github.com/yandex/ClickHouse/pull/1702) +* Fixed an error when loading `range_hashed` dictionaries (the message `Unsupported type Nullable (...)`). This error occurred in version 18.12.17. [#3362](https://github.com/yandex/ClickHouse/pull/3362) +* Fixed errors in the `pointInPolygon` function due to the accumulation of inaccurate calculations for polygons with a large number of vertices located close to each other. [#3331](https://github.com/yandex/ClickHouse/pull/3331) [#3341](https://github.com/yandex/ClickHouse/pull/3341) +* If after merging data parts, the checksum for the resulting part differs from the result of the same merge in another replica, the result of the merge is deleted and the data part is downloaded from the other replica (this is the correct behavior). But after downloading the data part, it couldn't be added to the working set because of an error that the part already exists (because the data part was deleted with some delay after the merge). This led to cyclical attempts to download the same data. [#3194](https://github.com/yandex/ClickHouse/pull/3194) +* Fixed incorrect calculation of total memory consumption by queries (because of incorrect calculation, the `max_memory_usage_for_all_queries` setting worked incorrectly and the `MemoryTracking` metric had an incorrect value). This error occurred in version 18.12.13. [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3344) +* Fixed the functionality of `CREATE TABLE ... ON CLUSTER ... AS SELECT ...` This error occurred in version 18.12.13. [#3247](https://github.com/yandex/ClickHouse/pull/3247) +* Fixed unnecessary preparation of data structures for `JOIN`s on the server that initiates the request if the `JOIN` is only performed on remote servers. [#3340](https://github.com/yandex/ClickHouse/pull/3340) +* Fixed bugs in the `Kafka` engine: deadlocks after exceptions when starting to read data, and locks upon completion [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3215). +* For `Kafka` tables, the optional `schema` parameter was not passed (the schema of the `Cap'n'Proto` format). [Vojtech Splichal](https://github.com/yandex/ClickHouse/pull/3150) +* If the ensemble of ZooKeeper servers has servers that accept the connection but then immediately close it instead of responding to the handshake, ClickHouse chooses to connect another server. Previously, this produced the error `Cannot read all data. Bytes read: 0. Bytes expected: 4.` and the server couldn't start. [8218cf3a](https://github.com/yandex/ClickHouse/commit/8218cf3a5f39a43401953769d6d12a0bb8d29da9) +* If the ensemble of ZooKeeper servers contains servers for which the DNS query returns an error, these servers are ignored. [17b8e209](https://github.com/yandex/ClickHouse/commit/17b8e209221061325ad7ba0539f03c6e65f87f29) +* Fixed type conversion between `Date` and `DateTime` when inserting data in the `VALUES` format (if `input_format_values_interpret_expressions = 1`). Previously, the conversion was performed between the numerical value of the number of days in Unix Epoch time and the Unix timestamp, which led to unexpected results. [#3229](https://github.com/yandex/ClickHouse/pull/3229) +* Corrected type conversion between `Decimal` and integer numbers. [#3211](https://github.com/yandex/ClickHouse/pull/3211) +* Fixed errors in the `enable_optimize_predicate_expression` setting. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3231) +* Fixed a parsing error in CSV format with floating-point numbers if a non-default CSV separator is used, such as `;` [#3155](https://github.com/yandex/ClickHouse/pull/3155) +* Fixed the `arrayCumSumNonNegative` function (it does not accumulate negative values if the accumulator is less than zero). [Aleksey Studnev](https://github.com/yandex/ClickHouse/pull/3163) +* Fixed how `Merge` tables work on top of `Distributed` tables when using `PREWHERE`. [#3165](https://github.com/yandex/ClickHouse/pull/3165) +* Bug fixes in the `ALTER UPDATE` query. +* Fixed bugs in the `odbc` table function that appeared in version 18.12. [#3197](https://github.com/yandex/ClickHouse/pull/3197) +* Fixed the operation of aggregate functions with `StateArray` combinators. [#3188](https://github.com/yandex/ClickHouse/pull/3188) +* Fixed a crash when dividing a `Decimal` value by zero. [69dd6609](https://github.com/yandex/ClickHouse/commit/69dd6609193beb4e7acd3e6ad216eca0ccfb8179) +* Fixed output of types for operations using `Decimal` and integer arguments. [#3224](https://github.com/yandex/ClickHouse/pull/3224) +* Fixed the segfault during `GROUP BY` on `Decimal128`. [3359ba06](https://github.com/yandex/ClickHouse/commit/3359ba06c39fcd05bfdb87d6c64154819621e13a) +* The `log_query_threads` setting (logging information about each thread of query execution) now takes effect only if the `log_queries` option (logging information about queries) is set to 1. Since the `log_query_threads` option is enabled by default, information about threads was previously logged even if query logging was disabled. [#3241](https://github.com/yandex/ClickHouse/pull/3241) +* Fixed an error in the distributed operation of the quantiles aggregate function (the error message `Not found column quantile...`). [292a8855](https://github.com/yandex/ClickHouse/commit/292a885533b8e3b41ce8993867069d14cbd5a664) +* Fixed the compatibility problem when working on a cluster of version 18.12.17 servers and older servers at the same time. For distributed queries with GROUP BY keys of both fixed and non-fixed length, if there was a large amount of data to aggregate, the returned data was not always fully aggregated (two different rows contained the same aggregation keys). [#3254](https://github.com/yandex/ClickHouse/pull/3254) +* Fixed handling of substitutions in `clickhouse-performance-test`, if the query contains only part of the substitutions declared in the test. [#3263](https://github.com/yandex/ClickHouse/pull/3263) +* Fixed an error when using `FINAL` with `PREWHERE`. [#3298](https://github.com/yandex/ClickHouse/pull/3298) +* Fixed an error when using `PREWHERE` over columns that were added during `ALTER`. [#3298](https://github.com/yandex/ClickHouse/pull/3298) +* Added a check for the absence of `arrayJoin` for `DEFAULT` and `MATERIALIZED` expressions. Previously, `arrayJoin` led to an error when inserting data. [#3337](https://github.com/yandex/ClickHouse/pull/3337) +* Added a check for the absence of `arrayJoin` in a `PREWHERE` clause. Previously, this led to messages like `Size ... doesn't match` or `Unknown compression method` when executing queries. [#3357](https://github.com/yandex/ClickHouse/pull/3357) +* Fixed segfault that could occur in rare cases after optimization that replaced AND chains from equality evaluations with the corresponding IN expression. [liuyimin-bytedance](https://github.com/yandex/ClickHouse/pull/3339) +* Minor corrections to `clickhouse-benchmark`: previously, client information was not sent to the server; now the number of queries executed is calculated more accurately when shutting down and for limiting the number of iterations. [#3351](https://github.com/yandex/ClickHouse/pull/3351) [#3352](https://github.com/yandex/ClickHouse/pull/3352) + +### Backward incompatible changes: + +* Removed the `allow_experimental_decimal_type` option. The `Decimal` data type is available for default use. [#3329](https://github.com/yandex/ClickHouse/pull/3329) + +## ClickHouse release 18.12.17, 2018-09-16 + +### New features: + +* `invalidate_query` (the ability to specify a query to check whether an external dictionary needs to be updated) is implemented for the `clickhouse` source. [#3126](https://github.com/yandex/ClickHouse/pull/3126) +* Added the ability to use `UInt*`, `Int*`, and `DateTime` data types (along with the `Date` type) as a `range_hashed` external dictionary key that defines the boundaries of ranges. Now `NULL` can be used to designate an open range. [Vasily Nemkov](https://github.com/yandex/ClickHouse/pull/3123) +* The `Decimal` type now supports `var*` and `stddev*` aggregate functions. [#3129](https://github.com/yandex/ClickHouse/pull/3129) +* The `Decimal` type now supports mathematical functions (`exp`, `sin` and so on.) [#3129](https://github.com/yandex/ClickHouse/pull/3129) +* The `system.part_log` table now has the `partition_id` column. [#3089](https://github.com/yandex/ClickHouse/pull/3089) + +### Bug fixes: + +* `Merge` now works correctly on `Distributed` tables. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3159) +* Fixed incompatibility (unnecessary dependency on the `glibc` version) that made it impossible to run ClickHouse on `Ubuntu Precise` and older versions. The incompatibility arose in version 18.12.13. [#3130](https://github.com/yandex/ClickHouse/pull/3130) +* Fixed errors in the `enable_optimize_predicate_expression` setting. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3107) +* Fixed a minor issue with backwards compatibility that appeared when working with a cluster of replicas on versions earlier than 18.12.13 and simultaneously creating a new replica of a table on a server with a newer version (shown in the message `Can not clone replica, because the ... updated to new ClickHouse version`, which is logical, but shouldn't happen). [#3122](https://github.com/yandex/ClickHouse/pull/3122) + +### Backward incompatible changes: + +* The `enable_optimize_predicate_expression` option is enabled by default (which is rather optimistic). If query analysis errors occur that are related to searching for the column names, set `enable_optimize_predicate_expression` to 0. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3107) + +## ClickHouse release 18.12.14, 2018-09-13 + +### New features: + +* Added support for `ALTER UPDATE` queries. [#3035](https://github.com/yandex/ClickHouse/pull/3035) +* Added the `allow_ddl` option, which restricts the user's access to DDL queries. [#3104](https://github.com/yandex/ClickHouse/pull/3104) +* Added the `min_merge_bytes_to_use_direct_io` option for `MergeTree` engines, which allows you to set a threshold for the total size of the merge (when above the threshold, data part files will be handled using O_DIRECT). [#3117](https://github.com/yandex/ClickHouse/pull/3117) +* The `system.merges` system table now contains the `partition_id` column. [#3099](https://github.com/yandex/ClickHouse/pull/3099) + +### Improvements + +* If a data part remains unchanged during mutation, it isn't downloaded by replicas. [#3103](https://github.com/yandex/ClickHouse/pull/3103) +* Autocomplete is available for names of settings when working with `clickhouse-client`. [#3106](https://github.com/yandex/ClickHouse/pull/3106) + +### Bug fixes: + +* Added a check for the sizes of arrays that are elements of `Nested` type fields when inserting. [#3118](https://github.com/yandex/ClickHouse/pull/3118) +* Fixed an error updating external dictionaries with the `ODBC` source and `hashed` storage. This error occurred in version 18.12.13. +* Fixed a crash when creating a temporary table from a query with an `IN` condition. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3098) +* Fixed an error in aggregate functions for arrays that can have `NULL` elements. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3097) + + +## ClickHouse release 18.12.13, 2018-09-10 + +### New features: + +* Added the `DECIMAL(digits, scale)` data type (`Decimal32(scale)`, `Decimal64(scale)`, `Decimal128(scale)`). To enable it, use the setting `allow_experimental_decimal_type`. [#2846](https://github.com/yandex/ClickHouse/pull/2846) [#2970](https://github.com/yandex/ClickHouse/pull/2970) [#3008](https://github.com/yandex/ClickHouse/pull/3008) [#3047](https://github.com/yandex/ClickHouse/pull/3047) +* New `WITH ROLLUP` modifier for `GROUP BY` (alternative syntax: `GROUP BY ROLLUP(...)`). [#2948](https://github.com/yandex/ClickHouse/pull/2948) +* In requests with JOIN, the star character expands to a list of columns in all tables, in compliance with the SQL standard. You can restore the old behavior by setting `asterisk_left_columns_only` to 1 on the user configuration level. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2787) +* Added support for JOIN with table functions. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2907) +* Autocomplete by pressing Tab in clickhouse-client. [Sergey Shcherbin](https://github.com/yandex/ClickHouse/pull/2447) +* Ctrl+C in clickhouse-client clears a query that was entered. [#2877](https://github.com/yandex/ClickHouse/pull/2877) +* Added the `join_default_strictness` setting (values: `"`, `'any'`, `'all'`). This allows you to not specify `ANY` or `ALL` for `JOIN`. [#2982](https://github.com/yandex/ClickHouse/pull/2982) +* Each line of the server log related to query processing shows the query ID. [#2482](https://github.com/yandex/ClickHouse/pull/2482) +* Now you can get query execution logs in clickhouse-client (use the `send_logs_level` setting). With distributed query processing, logs are cascaded from all the servers. [#2482](https://github.com/yandex/ClickHouse/pull/2482) +* The `system.query_log` and `system.processes` (`SHOW PROCESSLIST`) tables now have information about all changed settings when you run a query (the nested structure of the `Settings` data). Added the `log_query_settings` setting. [#2482](https://github.com/yandex/ClickHouse/pull/2482) +* The `system.query_log` and `system.processes` tables now show information about the number of threads that are participating in query execution (see the `thread_numbers` column). [#2482](https://github.com/yandex/ClickHouse/pull/2482) +* Added `ProfileEvents` counters that measure the time spent on reading and writing over the network and reading and writing to disk, the number of network errors, and the time spent waiting when network bandwidth is limited. [#2482](https://github.com/yandex/ClickHouse/pull/2482) +* Added `ProfileEvents`counters that contain the system metrics from rusage (you can use them to get information about CPU usage in userspace and the kernel, page faults, and context switches), as well as taskstats metrics (use these to obtain information about I/O wait time, CPU wait time, and the amount of data read and recorded, both with and without page cache). [#2482](https://github.com/yandex/ClickHouse/pull/2482) +* The `ProfileEvents` counters are applied globally and for each query, as well as for each query execution thread, which allows you to profile resource consumption by query in detail. [#2482](https://github.com/yandex/ClickHouse/pull/2482) +* Added the `system.query_thread_log` table, which contains information about each query execution thread. Added the `log_query_threads` setting. [#2482](https://github.com/yandex/ClickHouse/pull/2482) +* The `system.metrics` and `system.events` tables now have built-in documentation. [#3016](https://github.com/yandex/ClickHouse/pull/3016) +* Added the `arrayEnumerateDense` function. [Amos Bird](https://github.com/yandex/ClickHouse/pull/2975) +* Added the `arrayCumSumNonNegative` and `arrayDifference` functions. [Aleksey Studnev](https://github.com/yandex/ClickHouse/pull/2942) +* Added the `retention` aggregate function. [Sundy Li](https://github.com/yandex/ClickHouse/pull/2887) +* Now you can add (merge) states of aggregate functions by using the plus operator, and multiply the states of aggregate functions by a nonnegative constant. [#3062](https://github.com/yandex/ClickHouse/pull/3062) [#3034](https://github.com/yandex/ClickHouse/pull/3034) +* Tables in the MergeTree family now have the virtual column `_partition_id`. [#3089](https://github.com/yandex/ClickHouse/pull/3089) + +### Experimental features: + +* Added the `LowCardinality(T)` data type. This data type automatically creates a local dictionary of values and allows data processing without unpacking the dictionary. [#2830](https://github.com/yandex/ClickHouse/pull/2830) +* Added a cache of JIT-compiled functions and a counter for the number of uses before compiling. To JIT compile expressions, enable the `compile_expressions` setting. [#2990](https://github.com/yandex/ClickHouse/pull/2990) [#3077](https://github.com/yandex/ClickHouse/pull/3077) + +### Improvements: + +* Fixed the problem with unlimited accumulation of the replication log when there are abandoned replicas. Added an effective recovery mode for replicas with a long lag. +* Improved performance of `GROUP BY` with multiple aggregation fields when one of them is string and the others are fixed length. +* Improved performance when using `PREWHERE` and with implicit transfer of expressions in `PREWHERE`. +* Improved parsing performance for text formats (`CSV`, `TSV`). [Amos Bird](https://github.com/yandex/ClickHouse/pull/2977) [#2980](https://github.com/yandex/ClickHouse/pull/2980) +* Improved performance of reading strings and arrays in binary formats. [Amos Bird](https://github.com/yandex/ClickHouse/pull/2955) +* Increased performance and reduced memory consumption for queries to `system.tables` and `system.columns` when there is a very large number of tables on a single server. [#2953](https://github.com/yandex/ClickHouse/pull/2953) +* Fixed a performance problem in the case of a large stream of queries that result in an error (the ` _dl_addr` function is visible in `perf top`, but the server isn't using much CPU). [#2938](https://github.com/yandex/ClickHouse/pull/2938) +* Conditions are cast into the View (when `enable_optimize_predicate_expression` is enabled). [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2907) +* Improvements to the functionality for the `UUID` data type. [#3074](https://github.com/yandex/ClickHouse/pull/3074) [#2985](https://github.com/yandex/ClickHouse/pull/2985) +* The `UUID` data type is supported in The-Alchemist dictionaries. [#2822](https://github.com/yandex/ClickHouse/pull/2822) +* The `visitParamExtractRaw` function works correctly with nested structures. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2974) +* When the `input_format_skip_unknown_fields` setting is enabled, object fields in `JSONEachRow` format are skipped correctly. [BlahGeek](https://github.com/yandex/ClickHouse/pull/2958) +* For a `CASE` expression with conditions, you can now omit `ELSE`, which is equivalent to `ELSE NULL`. [#2920](https://github.com/yandex/ClickHouse/pull/2920) +* The operation timeout can now be configured when working with ZooKeeper. [urykhy](https://github.com/yandex/ClickHouse/pull/2971) +* You can specify an offset for `LIMIT n, m` as `LIMIT n OFFSET m`. [#2840](https://github.com/yandex/ClickHouse/pull/2840) +* You can use the `SELECT TOP n` syntax as an alternative for `LIMIT`. [#2840](https://github.com/yandex/ClickHouse/pull/2840) +* Increased the size of the queue to write to system tables, so the `SystemLog parameter queue is full` error doesn't happen as often. +* The `windowFunnel` aggregate function now supports events that meet multiple conditions. [Amos Bird](https://github.com/yandex/ClickHouse/pull/2801) +* Duplicate columns can be used in a `USING` clause for `JOIN`. [#3006](https://github.com/yandex/ClickHouse/pull/3006) +* `Pretty` formats now have a limit on column alignment by width. Use the `output_format_pretty_max_column_pad_width` setting. If a value is wider, it will still be displayed in its entirety, but the other cells in the table will not be too wide. [#3003](https://github.com/yandex/ClickHouse/pull/3003) +* The `odbc` table function now allows you to specify the database/schema name. [Amos Bird](https://github.com/yandex/ClickHouse/pull/2885) +* Added the ability to use a username specified in the `clickhouse-client` config file. [Vladimir Kozbin](https://github.com/yandex/ClickHouse/pull/2909) +* The `ZooKeeperExceptions` counter has been split into three counters: `ZooKeeperUserExceptions`, `ZooKeeperHardwareExceptions`, and `ZooKeeperOtherExceptions`. +* `ALTER DELETE` queries work for materialized views. +* Added randomization when running the cleanup thread periodically for `ReplicatedMergeTree` tables in order to avoid periodic load spikes when there are a very large number of `ReplicatedMergeTree` tables. +* Support for `ATTACH TABLE ... ON CLUSTER` queries. [#3025](https://github.com/yandex/ClickHouse/pull/3025) + +### Bug fixes: + +* Fixed an issue with `Dictionary` tables (throws the `Size of offsets doesn't match size of column` or `Unknown compression method` exception). This bug appeared in version 18.10.3. [#2913](https://github.com/yandex/ClickHouse/issues/2913) +* Fixed a bug when merging `CollapsingMergeTree` tables if one of the data parts is empty (these parts are formed during merge or `ALTER DELETE` if all data was deleted), and the `vertical` algorithm was used for the merge. [#3049](https://github.com/yandex/ClickHouse/pull/3049) +* Fixed a race condition during `DROP` or `TRUNCATE` for `Memory` tables with a simultaneous `SELECT`, which could lead to server crashes. This bug appeared in version 1.1.54388. [#3038](https://github.com/yandex/ClickHouse/pull/3038) +* Fixed the possibility of data loss when inserting in `Replicated` tables if the `Session is expired` error is returned (data loss can be detected by the `ReplicatedDataLoss` metric). This error occurred in version 1.1.54378. [#2939](https://github.com/yandex/ClickHouse/pull/2939) [#2949](https://github.com/yandex/ClickHouse/pull/2949) [#2964](https://github.com/yandex/ClickHouse/pull/2964) +* Fixed a segfault during `JOIN ... ON`. [#3000](https://github.com/yandex/ClickHouse/pull/3000) +* Fixed the error searching column names when the `WHERE` expression consists entirely of a qualified column name, such as `WHERE table.column`. [#2994](https://github.com/yandex/ClickHouse/pull/2994) +* Fixed the "Not found column" error that occurred when executing distributed queries if a single column consisting of an IN expression with a subquery is requested from a remote server. [#3087](https://github.com/yandex/ClickHouse/pull/3087) +* Fixed the `Block structure mismatch in UNION stream: different number of columns` error that occurred for distributed queries if one of the shards is local and the other is not, and optimization of the move to `PREWHERE` is triggered. [#2226](https://github.com/yandex/ClickHouse/pull/2226) [#3037](https://github.com/yandex/ClickHouse/pull/3037) [#3055](https://github.com/yandex/ClickHouse/pull/3055) [#3065](https://github.com/yandex/ClickHouse/pull/3065) [#3073](https://github.com/yandex/ClickHouse/pull/3073) [#3090](https://github.com/yandex/ClickHouse/pull/3090) [#3093](https://github.com/yandex/ClickHouse/pull/3093) +* Fixed the `pointInPolygon` function for certain cases of non-convex polygons. [#2910](https://github.com/yandex/ClickHouse/pull/2910) +* Fixed the incorrect result when comparing `nan` with integers. [#3024](https://github.com/yandex/ClickHouse/pull/3024) +* Fixed an error in the `zlib-ng` library that could lead to segfault in rare cases. [#2854](https://github.com/yandex/ClickHouse/pull/2854) +* Fixed a memory leak when inserting into a table with `AggregateFunction` columns, if the state of the aggregate function is not simple (allocates memory separately), and if a single insertion request results in multiple small blocks. [#3084](https://github.com/yandex/ClickHouse/pull/3084) +* Fixed a race condition when creating and deleting the same `Buffer` or `MergeTree` table simultaneously. +* Fixed the possibility of a segfault when comparing tuples made up of certain non-trivial types, such as tuples. [#2989](https://github.com/yandex/ClickHouse/pull/2989) +* Fixed the possibility of a segfault when running certain `ON CLUSTER` queries. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2960) +* Fixed an error in the `arrayDistinct` function for `Nullable` array elements. [#2845](https://github.com/yandex/ClickHouse/pull/2845) [#2937](https://github.com/yandex/ClickHouse/pull/2937) +* The `enable_optimize_predicate_expression` option now correctly supports cases with `SELECT *`. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2929) +* Fixed the segfault when re-initializing the ZooKeeper session. [#2917](https://github.com/yandex/ClickHouse/pull/2917) +* Fixed potential blocking when working with ZooKeeper. +* Fixed incorrect code for adding nested data structures in a `SummingMergeTree`. +* When allocating memory for states of aggregate functions, alignment is correctly taken into account, which makes it possible to use operations that require alignment when implementing states of aggregate functions. [chenxing-xc](https://github.com/yandex/ClickHouse/pull/2808) + +### Security fix: + +* Safe use of ODBC data sources. Interaction with ODBC drivers uses a separate `clickhouse-odbc-bridge` process. Errors in third-party ODBC drivers no longer cause problems with server stability or vulnerabilities. [#2828](https://github.com/yandex/ClickHouse/pull/2828) [#2879](https://github.com/yandex/ClickHouse/pull/2879) [#2886](https://github.com/yandex/ClickHouse/pull/2886) [#2893](https://github.com/yandex/ClickHouse/pull/2893) [#2921](https://github.com/yandex/ClickHouse/pull/2921) +* Fixed incorrect validation of the file path in the `catBoostPool` table function. [#2894](https://github.com/yandex/ClickHouse/pull/2894) +* The contents of system tables (`tables`, `databases`, `parts`, `columns`, `parts_columns`, `merges`, `mutations`, `replicas`, and `replication_queue`) are filtered according to the user's configured access to databases (`allow_databases`). [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2856) + +### Backward incompatible changes: + +* In requests with JOIN, the star character expands to a list of columns in all tables, in compliance with the SQL standard. You can restore the old behavior by setting `asterisk_left_columns_only` to 1 on the user configuration level. + +### Build changes: + +* Most integration tests can now be run by commit. +* Code style checks can also be run by commit. +* The `memcpy` implementation is chosen correctly when building on CentOS7/Fedora. [Etienne Champetier](https://github.com/yandex/ClickHouse/pull/2912) +* When using clang to build, some warnings from `-Weverything` have been added, in addition to the regular `-Wall-Wextra -Werror`. [#2957](https://github.com/yandex/ClickHouse/pull/2957) +* Debugging the build uses the `jemalloc` debug option. +* The interface of the library for interacting with ZooKeeper is declared abstract. [#2950](https://github.com/yandex/ClickHouse/pull/2950) + +## ClickHouse release 18.10.3, 2018-08-13 + +### New features: + +* HTTPS can be used for replication. [#2760](https://github.com/yandex/ClickHouse/pull/2760) +* Added the functions `murmurHash2_64`, `murmurHash3_32`, `murmurHash3_64`, and `murmurHash3_128` in addition to the existing `murmurHash2_32`. [#2791](https://github.com/yandex/ClickHouse/pull/2791) +* Support for Nullable types in the ClickHouse ODBC driver (`ODBCDriver2` output format). [#2834](https://github.com/yandex/ClickHouse/pull/2834) +* Support for `UUID` in the key columns. + +### Improvements: + +* Clusters can be removed without restarting the server when they are deleted from the config files. [#2777](https://github.com/yandex/ClickHouse/pull/2777) +* External dictionaries can be removed without restarting the server when they are removed from config files. [#2779](https://github.com/yandex/ClickHouse/pull/2779) +* Added `SETTINGS` support for the `Kafka` table engine. [Alexander Marshalov](https://github.com/yandex/ClickHouse/pull/2781) +* Improvements for the `UUID` data type (not yet complete). [#2618](https://github.com/yandex/ClickHouse/pull/2618) +* Support for empty parts after merges in the `SummingMergeTree`, `CollapsingMergeTree` and `VersionedCollapsingMergeTree` engines. [#2815](https://github.com/yandex/ClickHouse/pull/2815) +* Old records of completed mutations are deleted (`ALTER DELETE`). [#2784](https://github.com/yandex/ClickHouse/pull/2784) +* Added the `system.merge_tree_settings` table. [Kirill Shvakov](https://github.com/yandex/ClickHouse/pull/2841) +* The `system.tables` table now has dependency columns: `dependencies_database` and `dependencies_table`. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2851) +* Added the `max_partition_size_to_drop` config option. [#2782](https://github.com/yandex/ClickHouse/pull/2782) +* Added the `output_format_json_escape_forward_slashes` option. [Alexander Bocharov](https://github.com/yandex/ClickHouse/pull/2812) +* Added the `max_fetch_partition_retries_count` setting. [#2831](https://github.com/yandex/ClickHouse/pull/2831) +* Added the `prefer_localhost_replica` setting for disabling the preference for a local replica and going to a local replica without inter-process interaction. [#2832](https://github.com/yandex/ClickHouse/pull/2832) +* The `quantileExact` aggregate function returns `nan` in the case of aggregation on an empty `Float32` or `Float64` set. [Sundy Li](https://github.com/yandex/ClickHouse/pull/2855) + +### Bug fixes: + +* Removed unnecessary escaping of the connection string parameters for ODBC, which made it impossible to establish a connection. This error occurred in version 18.6.0. +* Fixed the logic for processing `REPLACE PARTITION` commands in the replication queue. If there are two `REPLACE` commands for the same partition, the incorrect logic could cause one of them to remain in the replication queue and not be executed. [#2814](https://github.com/yandex/ClickHouse/pull/2814) +* Fixed a merge bug when all data parts were empty (parts that were formed from a merge or from `ALTER DELETE` if all data was deleted). This bug appeared in version 18.1.0. [#2930](https://github.com/yandex/ClickHouse/pull/2930) +* Fixed an error for concurrent `Set` or `Join`. [Amos Bird](https://github.com/yandex/ClickHouse/pull/2823) +* Fixed the `Block structure mismatch in UNION stream: different number of columns` error that occurred for `UNION ALL` queries inside a sub-query if one of the `SELECT` queries contains duplicate column names. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2094) +* Fixed a memory leak if an exception occurred when connecting to a MySQL server. +* Fixed incorrect clickhouse-client response code in case of a request error. +* Fixed incorrect behavior of materialized views containing DISTINCT. [#2795](https://github.com/yandex/ClickHouse/issues/2795) + +### Backward incompatible changes + +* Removed support for CHECK TABLE queries for Distributed tables. + +### Build changes: + +* The allocator has been replaced: `jemalloc` is now used instead of `tcmalloc`. In some scenarios, this increases speed up to 20%. However, there are queries that have slowed by up to 20%. Memory consumption has been reduced by approximately 10% in some scenarios, with improved stability. With highly competitive loads, CPU usage in userspace and in system shows just a slight increase. [#2773](https://github.com/yandex/ClickHouse/pull/2773) +* Use of libressl from a submodule. [#1983](https://github.com/yandex/ClickHouse/pull/1983) [#2807](https://github.com/yandex/ClickHouse/pull/2807) +* Use of unixodbc from a submodule. [#2789](https://github.com/yandex/ClickHouse/pull/2789) +* Use of mariadb-connector-c from a submodule. [#2785](https://github.com/yandex/ClickHouse/pull/2785) +* Added functional test files to the repository that depend on the availability of test data (for the time being, without the test data itself). + +## ClickHouse release 18.6.0, 2018-08-02 + +### New features: + +* Added support for ON expressions for the JOIN ON syntax: +`JOIN ON Expr([table.]column ...) = Expr([table.]column, ...) [AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]` +The expression must be a chain of equalities joined by the AND operator. Each side of the equality can be an arbitrary expression over the columns of one of the tables. The use of fully qualified column names is supported (`table.name`, `database.table.name`, `table_alias.name`, `subquery_alias.name`) for the right table. [#2742](https://github.com/yandex/ClickHouse/pull/2742) +* HTTPS can be enabled for replication. [#2760](https://github.com/yandex/ClickHouse/pull/2760) + +### Improvements: + +* The server passes the patch component of its version to the client. Data about the patch version component is in `system.processes` and `query_log`. [#2646](https://github.com/yandex/ClickHouse/pull/2646) + +## ClickHouse release 18.5.1, 2018-07-31 + +### New features: + +* Added the hash function `murmurHash2_32` [#2756](https://github.com/yandex/ClickHouse/pull/2756). + +### Improvements: + +* Now you can use the `from_env` [#2741](https://github.com/yandex/ClickHouse/pull/2741) attribute to set values in config files from environment variables. +* Added case-insensitive versions of the `coalesce`, `ifNull`, and `nullIf functions` [#2752](https://github.com/yandex/ClickHouse/pull/2752). + +### Bug fixes: + +* Fixed a possible bug when starting a replica [#2759](https://github.com/yandex/ClickHouse/pull/2759). + +## ClickHouse release 18.4.0, 2018-07-28 + +### New features: + +* Added system tables: `formats`, `data_type_families`, `aggregate_function_combinators`, `table_functions`, `table_engines`, `collations` [#2721](https://github.com/yandex/ClickHouse/pull/2721). +* Added the ability to use a table function instead of a table as an argument of a `remote` or `cluster table function` [#2708](https://github.com/yandex/ClickHouse/pull/2708). +* Support for `HTTP Basic` authentication in the replication protocol [#2727](https://github.com/yandex/ClickHouse/pull/2727). +* The `has` function now allows searching for a numeric value in an array of `Enum` values [Maxim Khrisanfov](https://github.com/yandex/ClickHouse/pull/2699). +* Support for adding arbitrary message separators when reading from `Kafka` [Amos Bird](https://github.com/yandex/ClickHouse/pull/2701). + +### Improvements: + +* The `ALTER TABLE t DELETE WHERE` query does not rewrite data parts that were not affected by the WHERE condition [#2694](https://github.com/yandex/ClickHouse/pull/2694). +* The `use_minimalistic_checksums_in_zookeeper` option for `ReplicatedMergeTree` tables is enabled by default. This setting was added in version 1.1.54378, 2018-04-16. Versions that are older than 1.1.54378 can no longer be installed. +* Support for running `KILL` and `OPTIMIZE` queries that specify `ON CLUSTER` [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2689). + +### Bug fixes: + +* Fixed the error `Column ... is not under an aggregate function and not in GROUP BY` for aggregation with an IN expression. This bug appeared in version 18.1.0. ([bbdd780b](https://github.com/yandex/ClickHouse/commit/bbdd780be0be06a0f336775941cdd536878dd2c2)) +* Fixed a bug in the `windowFunnel aggregate function` [Winter Zhang](https://github.com/yandex/ClickHouse/pull/2735). +* Fixed a bug in the `anyHeavy` aggregate function ([a2101df2](https://github.com/yandex/ClickHouse/commit/a2101df25a6a0fba99aa71f8793d762af2b801ee)) +* Fixed server crash when using the `countArray()` aggregate function. + +### Backward incompatible changes: + +* Parameters for `Kafka` engine was changed from `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_schema, kafka_num_consumers])` to `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_row_delimiter, kafka_schema, kafka_num_consumers])`. If your tables use `kafka_schema` or `kafka_num_consumers` parameters, you have to manually edit the metadata files `path/metadata/database/table.sql` and add `kafka_row_delimiter` parameter with `''` value. + +## ClickHouse release 18.1.0, 2018-07-23 + +### New features: + +* Support for the `ALTER TABLE t DELETE WHERE` query for non-replicated MergeTree tables ([#2634](https://github.com/yandex/ClickHouse/pull/2634)). +* Support for arbitrary types for the `uniq*` family of aggregate functions ([#2010](https://github.com/yandex/ClickHouse/issues/2010)). +* Support for arbitrary types in comparison operators ([#2026](https://github.com/yandex/ClickHouse/issues/2026)). +* The `users.xml` file allows setting a subnet mask in the format `10.0.0.1/255.255.255.0`. This is necessary for using masks for IPv6 networks with zeros in the middle ([#2637](https://github.com/yandex/ClickHouse/pull/2637)). +* Added the `arrayDistinct` function ([#2670](https://github.com/yandex/ClickHouse/pull/2670)). +* The SummingMergeTree engine can now work with AggregateFunction type columns ([Constantin S. Pan](https://github.com/yandex/ClickHouse/pull/2566)). + +### Improvements: + +* Changed the numbering scheme for release versions. Now the first part contains the year of release (A.D., Moscow timezone, minus 2000), the second part contains the number for major changes (increases for most releases), and the third part is the patch version. Releases are still backwards compatible, unless otherwise stated in the changelog. +* Faster conversions of floating-point numbers to a string ([Amos Bird](https://github.com/yandex/ClickHouse/pull/2664)). +* If some rows were skipped during an insert due to parsing errors (this is possible with the `input_allow_errors_num` and `input_allow_errors_ratio` settings enabled), the number of skipped rows is now written to the server log ([Leonardo Cecchi](https://github.com/yandex/ClickHouse/pull/2669)). + +### Bug fixes: + +* Fixed the TRUNCATE command for temporary tables ([Amos Bird](https://github.com/yandex/ClickHouse/pull/2624)). +* Fixed a rare deadlock in the ZooKeeper client library that occurred when there was a network error while reading the response ([c315200](https://github.com/yandex/ClickHouse/commit/c315200e64b87e44bdf740707fc857d1fdf7e947)). +* Fixed an error during a CAST to Nullable types ([#1322](https://github.com/yandex/ClickHouse/issues/1322)). +* Fixed the incorrect result of the `maxIntersection()` function when the boundaries of intervals coincided ([Michael Furmur](https://github.com/yandex/ClickHouse/pull/2657)). +* Fixed incorrect transformation of the OR expression chain in a function argument ([chenxing-xc](https://github.com/yandex/ClickHouse/pull/2663)). +* Fixed performance degradation for queries containing `IN (subquery)` expressions inside another subquery ([#2571](https://github.com/yandex/ClickHouse/issues/2571)). +* Fixed incompatibility between servers with different versions in distributed queries that use a `CAST` function that isn't in uppercase letters ([fe8c4d6](https://github.com/yandex/ClickHouse/commit/fe8c4d64e434cacd4ceef34faa9005129f2190a5)). +* Added missing quoting of identifiers for queries to an external DBMS ([#2635](https://github.com/yandex/ClickHouse/issues/2635)). + +### Backward incompatible changes: + +* Converting a string containing the number zero to DateTime does not work. Example: `SELECT toDateTime('0')`. This is also the reason that `DateTime DEFAULT '0'` does not work in tables, as well as `0` in dictionaries. Solution: replace `0` with `0000-00-00 00:00:00`. + +## ClickHouse release 1.1.54394, 2018-07-12 + +### New features: + +* Added the `histogram` aggregate function ([Mikhail Surin](https://github.com/yandex/ClickHouse/pull/2521)). +* Now `OPTIMIZE TABLE ... FINAL` can be used without specifying partitions for `ReplicatedMergeTree` ([Amos Bird](https://github.com/yandex/ClickHouse/pull/2600)). + +### Bug fixes: + +* Fixed a problem with a very small timeout for sockets (one second) for reading and writing when sending and downloading replicated data, which made it impossible to download larger parts if there is a load on the network or disk (it resulted in cyclical attempts to download parts). This error occurred in version 1.1.54388. +* Fixed issues when using chroot in ZooKeeper if you inserted duplicate data blocks in the table. +* The `has` function now works correctly for an array with Nullable elements ([#2115](https://github.com/yandex/ClickHouse/issues/2115)). +* The `system.tables` table now works correctly when used in distributed queries. The `metadata_modification_time` and `engine_full` columns are now non-virtual. Fixed an error that occurred if only these columns were requested from the table. +* Fixed how an empty `TinyLog` table works after inserting an empty data block ([#2563](https://github.com/yandex/ClickHouse/issues/2563)). +* The `system.zookeeper` table works if the value of the node in ZooKeeper is NULL. + +## ClickHouse release 1.1.54390, 2018-07-06 + +### New features: + +* Queries can be sent in `multipart/form-data` format (in the `query` field), which is useful if external data is also sent for query processing ([Olga Hvostikova](https://github.com/yandex/ClickHouse/pull/2490)). +* Added the ability to enable or disable processing single or double quotes when reading data in CSV format. You can configure this in the `format_csv_allow_single_quotes` and `format_csv_allow_double_quotes` settings ([Amos Bird](https://github.com/yandex/ClickHouse/pull/2574)). +* Now `OPTIMIZE TABLE ... FINAL` can be used without specifying the partition for non-replicated variants of `MergeTree` ([Amos Bird](https://github.com/yandex/ClickHouse/pull/2599)). + +### Improvements: + +* Improved performance, reduced memory consumption, and correct memory consumption tracking with use of the IN operator when a table index could be used ([#2584](https://github.com/yandex/ClickHouse/pull/2584)). +* Removed redundant checking of checksums when adding a data part. This is important when there are a large number of replicas, because in these cases the total number of checks was equal to N^2. +* Added support for `Array(Tuple(...))` arguments for the `arrayEnumerateUniq` function ([#2573](https://github.com/yandex/ClickHouse/pull/2573)). +* Added `Nullable` support for the `runningDifference` function ([#2594](https://github.com/yandex/ClickHouse/pull/2594)). +* Improved query analysis performance when there is a very large number of expressions ([#2572](https://github.com/yandex/ClickHouse/pull/2572)). +* Faster selection of data parts for merging in `ReplicatedMergeTree` tables. Faster recovery of the ZooKeeper session ([#2597](https://github.com/yandex/ClickHouse/pull/2597)). +* The `format_version.txt` file for `MergeTree` tables is re-created if it is missing, which makes sense if ClickHouse is launched after copying the directory structure without files ([Ciprian Hacman](https://github.com/yandex/ClickHouse/pull/2593)). + +### Bug fixes: + +* Fixed a bug when working with ZooKeeper that could make it impossible to recover the session and readonly states of tables before restarting the server. +* Fixed a bug when working with ZooKeeper that could result in old nodes not being deleted if the session is interrupted. +* Fixed an error in the `quantileTDigest` function for Float arguments (this bug was introduced in version 1.1.54388) ([Mikhail Surin](https://github.com/yandex/ClickHouse/pull/2553)). +* Fixed a bug in the index for MergeTree tables if the primary key column is located inside the function for converting types between signed and unsigned integers of the same size ([#2603](https://github.com/yandex/ClickHouse/pull/2603)). +* Fixed segfault if `macros` are used but they aren't in the config file ([#2570](https://github.com/yandex/ClickHouse/pull/2570)). +* Fixed switching to the default database when reconnecting the client ([#2583](https://github.com/yandex/ClickHouse/pull/2583)). +* Fixed a bug that occurred when the `use_index_for_in_with_subqueries` setting was disabled. + +### Security fix: + +* Sending files is no longer possible when connected to MySQL (`LOAD DATA LOCAL INFILE`). + +## ClickHouse release 1.1.54388, 2018-06-28 + +### New features: + +* Support for the `ALTER TABLE t DELETE WHERE` query for replicated tables. Added the `system.mutations` table to track progress of this type of queries. +* Support for the `ALTER TABLE t [REPLACE|ATTACH] PARTITION` query for \*MergeTree tables. +* Support for the `TRUNCATE TABLE` query ([Winter Zhang](https://github.com/yandex/ClickHouse/pull/2260)) +* Several new `SYSTEM` queries for replicated tables (`RESTART REPLICAS`, `SYNC REPLICA`, `[STOP|START] [MERGES|FETCHES|SENDS REPLICATED|REPLICATION QUEUES]`). +* Added the ability to write to a table with the MySQL engine and the corresponding table function ([sundy-li](https://github.com/yandex/ClickHouse/pull/2294)). +* Added the `url()` table function and the `URL` table engine ([Alexander Sapin](https://github.com/yandex/ClickHouse/pull/2501)). +* Added the `windowFunnel` aggregate function ([sundy-li](https://github.com/yandex/ClickHouse/pull/2352)). +* New `startsWith` and `endsWith` functions for strings ([Vadim Plakhtinsky](https://github.com/yandex/ClickHouse/pull/2429)). +* The `numbers()` table function now allows you to specify the offset ([Winter Zhang](https://github.com/yandex/ClickHouse/pull/2535)). +* The password to `clickhouse-client` can be entered interactively. +* Server logs can now be sent to syslog ([Alexander Krasheninnikov](https://github.com/yandex/ClickHouse/pull/2459)). +* Support for logging in dictionaries with a shared library source ([Alexander Sapin](https://github.com/yandex/ClickHouse/pull/2472)). +* Support for custom CSV delimiters ([Ivan Zhukov](https://github.com/yandex/ClickHouse/pull/2263)) +* Added the `date_time_input_format` setting. If you switch this setting to `'best_effort'`, DateTime values will be read in a wide range of formats. +* Added the `clickhouse-obfuscator` utility for data obfuscation. Usage example: publishing data used in performance tests. + +### Experimental features: + +* Added the ability to calculate `and` arguments only where they are needed ([Anastasia Tsarkova](https://github.com/yandex/ClickHouse/pull/2272)) +* JIT compilation to native code is now available for some expressions ([pyos](https://github.com/yandex/ClickHouse/pull/2277)). + +### Bug fixes: + +* Duplicates no longer appear for a query with `DISTINCT` and `ORDER BY`. +* Queries with `ARRAY JOIN` and `arrayFilter` no longer return an incorrect result. +* Fixed an error when reading an array column from a Nested structure ([#2066](https://github.com/yandex/ClickHouse/issues/2066)). +* Fixed an error when analyzing queries with a HAVING clause like `HAVING tuple IN (...)`. +* Fixed an error when analyzing queries with recursive aliases. +* Fixed an error when reading from ReplacingMergeTree with a condition in PREWHERE that filters all rows ([#2525](https://github.com/yandex/ClickHouse/issues/2525)). +* User profile settings were not applied when using sessions in the HTTP interface. +* Fixed how settings are applied from the command line parameters in clickhouse-local. +* The ZooKeeper client library now uses the session timeout received from the server. +* Fixed a bug in the ZooKeeper client library when the client waited for the server response longer than the timeout. +* Fixed pruning of parts for queries with conditions on partition key columns ([#2342](https://github.com/yandex/ClickHouse/issues/2342)). +* Merges are now possible after `CLEAR COLUMN IN PARTITION` ([#2315](https://github.com/yandex/ClickHouse/issues/2315)). +* Type mapping in the ODBC table function has been fixed ([sundy-li](https://github.com/yandex/ClickHouse/pull/2268)). +* Type comparisons have been fixed for `DateTime` with and without the time zone ([Alexander Bocharov](https://github.com/yandex/ClickHouse/pull/2400)). +* Fixed syntactic parsing and formatting of the `CAST` operator. +* Fixed insertion into a materialized view for the Distributed table engine ([Babacar Diassé](https://github.com/yandex/ClickHouse/pull/2411)). +* Fixed a race condition when writing data from the `Kafka` engine to materialized views ([Yangkuan Liu](https://github.com/yandex/ClickHouse/pull/2448)). +* Fixed SSRF in the remote() table function. +* Fixed exit behavior of `clickhouse-client` in multiline mode ([#2510](https://github.com/yandex/ClickHouse/issues/2510)). + +### Improvements: + +* Background tasks in replicated tables are now performed in a thread pool instead of in separate threads ([Silviu Caragea](https://github.com/yandex/ClickHouse/pull/1722)). +* Improved LZ4 compression performance. +* Faster analysis for queries with a large number of JOINs and sub-queries. +* The DNS cache is now updated automatically when there are too many network errors. +* Table inserts no longer occur if the insert into one of the materialized views is not possible because it has too many parts. +* Corrected the discrepancy in the event counters `Query`, `SelectQuery`, and `InsertQuery`. +* Expressions like `tuple IN (SELECT tuple)` are allowed if the tuple types match. +* A server with replicated tables can start even if you haven't configured ZooKeeper. +* When calculating the number of available CPU cores, limits on cgroups are now taken into account ([Atri Sharma](https://github.com/yandex/ClickHouse/pull/2325)). +* Added chown for config directories in the systemd config file ([Mikhail Shiryaev](https://github.com/yandex/ClickHouse/pull/2421)). + +### Build changes: + +* The gcc8 compiler can be used for builds. +* Added the ability to build llvm from submodule. +* The version of the librdkafka library has been updated to v0.11.4. +* Added the ability to use the system libcpuid library. The library version has been updated to 0.4.0. +* Fixed the build using the vectorclass library ([Babacar Diassé](https://github.com/yandex/ClickHouse/pull/2274)). +* Cmake now generates files for ninja by default (like when using `-G Ninja`). +* Added the ability to use the libtinfo library instead of libtermcap ([Georgy Kondratiev](https://github.com/yandex/ClickHouse/pull/2519)). +* Fixed a header file conflict in Fedora Rawhide ([#2520](https://github.com/yandex/ClickHouse/issues/2520)). + +### Backward incompatible changes: + +* Removed escaping in `Vertical` and `Pretty*` formats and deleted the `VerticalRaw` format. +* If servers with version 1.1.54388 (or newer) and servers with an older version are used simultaneously in a distributed query and the query has the `cast(x, 'Type')` expression without the `AS` keyword and doesn't have the word `cast` in uppercase, an exception will be thrown with a message like `Not found column cast(0, 'UInt8') in block`. Solution: Update the server on the entire cluster. + +## ClickHouse release 1.1.54385, 2018-06-01 + +### Bug fixes: + +* Fixed an error that in some cases caused ZooKeeper operations to block. + +## ClickHouse release 1.1.54383, 2018-05-22 + +### Bug fixes: + +* Fixed a slowdown of replication queue if a table has many replicas. + +## ClickHouse release 1.1.54381, 2018-05-14 + +### Bug fixes: + +* Fixed a nodes leak in ZooKeeper when ClickHouse loses connection to ZooKeeper server. + +## ClickHouse release 1.1.54380, 2018-04-21 + +### New features: + +* Added the table function `file(path, format, structure)`. An example reading bytes from `/dev/urandom`: `ln -s /dev/urandom /var/lib/clickhouse/user_files/random``clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10"`. + +### Improvements: + +* Subqueries can be wrapped in `()` brackets to enhance query readability. For example: `(SELECT 1) UNION ALL (SELECT 1)`. +* Simple `SELECT` queries from the `system.processes` table are not included in the `max_concurrent_queries` limit. + +### Bug fixes: + +* Fixed incorrect behavior of the `IN` operator when select from `MATERIALIZED VIEW`. +* Fixed incorrect filtering by partition index in expressions like `partition_key_column IN (...)`. +* Fixed inability to execute `OPTIMIZE` query on non-leader replica if `REANAME` was performed on the table. +* Fixed the authorization error when executing `OPTIMIZE` or `ALTER` queries on a non-leader replica. +* Fixed freezing of `KILL QUERY`. +* Fixed an error in ZooKeeper client library which led to loss of watches, freezing of distributed DDL queue, and slowdowns in the replication queue if a non-empty `chroot` prefix is used in the ZooKeeper configuration. + +### Backward incompatible changes: + +* Removed support for expressions like `(a, b) IN (SELECT (a, b))` (you can use the equivalent expression `(a, b) IN (SELECT a, b)`). In previous releases, these expressions led to undetermined `WHERE` filtering or caused errors. + +## ClickHouse release 1.1.54378, 2018-04-16 + +### New features: + +* Logging level can be changed without restarting the server. +* Added the `SHOW CREATE DATABASE` query. +* The `query_id` can be passed to `clickhouse-client` (elBroom). +* New setting: `max_network_bandwidth_for_all_users`. +* Added support for `ALTER TABLE ... PARTITION ... ` for `MATERIALIZED VIEW`. +* Added information about the size of data parts in uncompressed form in the system table. +* Server-to-server encryption support for distributed tables (`1` in the replica config in ``). +* Configuration of the table level for the `ReplicatedMergeTree` family in order to minimize the amount of data stored in Zookeeper: : `use_minimalistic_checksums_in_zookeeper = 1` +* Configuration of the `clickhouse-client` prompt. By default, server names are now output to the prompt. The server's display name can be changed. It's also sent in the `X-ClickHouse-Display-Name` HTTP header (Kirill Shvakov). +* Multiple comma-separated `topics` can be specified for the `Kafka` engine (Tobias Adamson) +* When a query is stopped by `KILL QUERY` or `replace_running_query`, the client receives the `Query was cancelled` exception instead of an incomplete result. + +### Improvements: + +* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run at the front of the replication queue. +* `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part. +* A `query_log` table is recreated on the fly if it was deleted manually (Kirill Shvakov). +* The `lengthUTF8` function runs faster (zhang2014). +* Improved performance of synchronous inserts in `Distributed` tables (`insert_distributed_sync = 1`) when there is a very large number of shards. +* The server accepts the `send_timeout` and `receive_timeout` settings from the client and applies them when connecting to the client (they are applied in reverse order: the server socket's `send_timeout` is set to the `receive_timeout` value received from the client, and vice versa). +* More robust crash recovery for asynchronous insertion into `Distributed` tables. +* The return type of the `countEqual` function changed from `UInt32` to `UInt64` (谢磊). + +### Bug fixes: + +* Fixed an error with `IN` when the left side of the expression is `Nullable`. +* Correct results are now returned when using tuples with `IN` when some of the tuple components are in the table index. +* The `max_execution_time` limit now works correctly with distributed queries. +* Fixed errors when calculating the size of composite columns in the `system.columns` table. +* Fixed an error when creating a temporary table `CREATE TEMPORARY TABLE IF NOT EXISTS.` +* Fixed errors in `StorageKafka` (##2075) +* Fixed server crashes from invalid arguments of certain aggregate functions. +* Fixed the error that prevented the `DETACH DATABASE` query from stopping background tasks for `ReplicatedMergeTree` tables. +* `Too many parts` state is less likely to happen when inserting into aggregated materialized views (##2084). +* Corrected recursive handling of substitutions in the config if a substitution must be followed by another substitution on the same level. +* Corrected the syntax in the metadata file when creating a `VIEW` that uses a query with `UNION ALL`. +* `SummingMergeTree` now works correctly for summation of nested data structures with a composite key. +* Fixed the possibility of a race condition when choosing the leader for `ReplicatedMergeTree` tables. + +### Build changes: + +* The build supports `ninja` instead of `make` and uses `ninja` by default for building releases. +* Renamed packages: `clickhouse-server-base` in `clickhouse-common-static`; `clickhouse-server-common` in `clickhouse-server`; `clickhouse-common-dbg` in `clickhouse-common-static-dbg`. To install, use `clickhouse-server clickhouse-client`. Packages with the old names will still load in the repositories for backward compatibility. + +### Backward incompatible changes: + +* Removed the special interpretation of an IN expression if an array is specified on the left side. Previously, the expression `arr IN (set)` was interpreted as "at least one `arr` element belongs to the `set`". To get the same behavior in the new version, write `arrayExists(x -> x IN (set), arr)`. +* Disabled the incorrect use of the socket option `SO_REUSEPORT`, which was incorrectly enabled by default in the Poco library. Note that on Linux there is no longer any reason to simultaneously specify the addresses `::` and `0.0.0.0` for listen – use just `::`, which allows listening to the connection both over IPv4 and IPv6 (with the default kernel config settings). You can also revert to the behavior from previous versions by specifying `1` in the config. + +## ClickHouse release 1.1.54370, 2018-03-16 + +### New features: + +* Added the `system.macros` table and auto updating of macros when the config file is changed. +* Added the `SYSTEM RELOAD CONFIG` query. +* Added the `maxIntersections(left_col, right_col)` aggregate function, which returns the maximum number of simultaneously intersecting intervals `[left; right]`. The `maxIntersectionsPosition(left, right)` function returns the beginning of the "maximum" interval. ([Michael Furmur](https://github.com/yandex/ClickHouse/pull/2012)). + +### Improvements: + +* When inserting data in a `Replicated` table, fewer requests are made to `ZooKeeper` (and most of the user-level errors have disappeared from the `ZooKeeper` log). +* Added the ability to create aliases for data sets. Example: `WITH (1, 2, 3) AS set SELECT number IN set FROM system.numbers LIMIT 10`. + +### Bug fixes: + +* Fixed the `Illegal PREWHERE` error when reading from Merge tables for `Distributed`tables. +* Added fixes that allow you to start clickhouse-server in IPv4-only Docker containers. +* Fixed a race condition when reading from system `system.parts_columns tables.` +* Removed double buffering during a synchronous insert to a `Distributed` table, which could have caused the connection to timeout. +* Fixed a bug that caused excessively long waits for an unavailable replica before beginning a `SELECT` query. +* Fixed incorrect dates in the `system.parts` table. +* Fixed a bug that made it impossible to insert data in a `Replicated` table if `chroot` was non-empty in the configuration of the `ZooKeeper` cluster. +* Fixed the vertical merging algorithm for an empty `ORDER BY` table. +* Restored the ability to use dictionaries in queries to remote tables, even if these dictionaries are not present on the requestor server. This functionality was lost in release 1.1.54362. +* Restored the behavior for queries like `SELECT * FROM remote('server2', default.table) WHERE col IN (SELECT col2 FROM default.table)` when the right side of the `IN` should use a remote `default.table` instead of a local one. This behavior was broken in version 1.1.54358. +* Removed extraneous error-level logging of `Not found column ... in block`. + +## Clickhouse Release 1.1.54362, 2018-03-11 + +### New features: + +* Aggregation without `GROUP BY` for an empty set (such as `SELECT count(*) FROM table WHERE 0`) now returns a result with one row with null values for aggregate functions, in compliance with the SQL standard. To restore the old behavior (return an empty result), set `empty_result_for_aggregation_by_empty_set` to 1. +* Added type conversion for `UNION ALL`. Different alias names are allowed in `SELECT` positions in `UNION ALL`, in compliance with the SQL standard. +* Arbitrary expressions are supported in `LIMIT BY` clauses. Previously, it was only possible to use columns resulting from `SELECT`. +* An index of `MergeTree` tables is used when `IN` is applied to a tuple of expressions from the columns of the primary key. Example: `WHERE (UserID, EventDate) IN ((123, '2000-01-01'), ...)` (Anastasiya Tsarkova). +* Added the `clickhouse-copier` tool for copying between clusters and resharding data (beta). +* Added consistent hashing functions: `yandexConsistentHash`, `jumpConsistentHash`, `sumburConsistentHash`. They can be used as a sharding key in order to reduce the amount of network traffic during subsequent reshardings. +* Added functions: `arrayAny`, `arrayAll`, `hasAny`, `hasAll`, `arrayIntersect`, `arrayResize`. +* Added the `arrayCumSum` function (Javi Santana). +* Added the `parseDateTimeBestEffort`, `parseDateTimeBestEffortOrZero`, and `parseDateTimeBestEffortOrNull` functions to read the DateTime from a string containing text in a wide variety of possible formats. +* Data can be partially reloaded from external dictionaries during updating (load just the records in which the value of the specified field greater than in the previous download) (Arsen Hakobyan). +* Added the `cluster` table function. Example: `cluster(cluster_name, db, table)`. The `remote` table function can accept the cluster name as the first argument, if it is specified as an identifier. +* The `remote` and `cluster` table functions can be used in `INSERT` requests. +* Added the `create_table_query` and `engine_full` virtual columns to the `system.tables`table . The `metadata_modification_time` column is virtual. +* Added the `data_path` and `metadata_path` columns to `system.tables`and` system.databases` tables, and added the `path` column to the `system.parts` and `system.parts_columns` tables. +* Added additional information about merges in the `system.part_log` table. +* An arbitrary partitioning key can be used for the `system.query_log` table (Kirill Shvakov). +* The `SHOW TABLES` query now also shows temporary tables. Added temporary tables and the `is_temporary` column to `system.tables` (zhang2014). +* Added `DROP TEMPORARY TABLE` and `EXISTS TEMPORARY TABLE` queries (zhang2014). +* Support for `SHOW CREATE TABLE` for temporary tables (zhang2014). +* Added the `system_profile` configuration parameter for the settings used by internal processes. +* Support for loading `object_id` as an attribute in `MongoDB` dictionaries (Pavel Litvinenko). +* Reading `null` as the default value when loading data for an external dictionary with the `MongoDB` source (Pavel Litvinenko). +* Reading `DateTime` values in the `Values` format from a Unix timestamp without single quotes. +* Failover is supported in `remote` table functions for cases when some of the replicas are missing the requested table. +* Configuration settings can be overridden in the command line when you run `clickhouse-server`. Example: `clickhouse-server -- --logger.level=information`. +* Implemented the `empty` function from a `FixedString` argument: the function returns 1 if the string consists entirely of null bytes (zhang2014). +* Added the `listen_try`configuration parameter for listening to at least one of the listen addresses without quitting, if some of the addresses can't be listened to (useful for systems with disabled support for IPv4 or IPv6). +* Added the `VersionedCollapsingMergeTree` table engine. +* Support for rows and arbitrary numeric types for the `library` dictionary source. +* `MergeTree` tables can be used without a primary key (you need to specify `ORDER BY tuple()`). +* A `Nullable` type can be `CAST` to a non-`Nullable` type if the argument is not `NULL`. +* `RENAME TABLE` can be performed for `VIEW`. +* Added the `throwIf` function. +* Added the `odbc_default_field_size` option, which allows you to extend the maximum size of the value loaded from an ODBC source (by default, it is 1024). +* The `system.processes` table and `SHOW PROCESSLIST` now have the `is_cancelled` and `peak_memory_usage` columns. + +### Improvements: + +* Limits and quotas on the result are no longer applied to intermediate data for `INSERT SELECT` queries or for `SELECT` subqueries. +* Fewer false triggers of `force_restore_data` when checking the status of `Replicated` tables when the server starts. +* Added the `allow_distributed_ddl` option. +* Nondeterministic functions are not allowed in expressions for `MergeTree` table keys. +* Files with substitutions from `config.d` directories are loaded in alphabetical order. +* Improved performance of the `arrayElement` function in the case of a constant multidimensional array with an empty array as one of the elements. Example: `[[1], []][x]`. +* The server starts faster now when using configuration files with very large substitutions (for instance, very large lists of IP networks). +* When running a query, table valued functions run once. Previously, `remote` and `mysql` table valued functions performed the same query twice to retrieve the table structure from a remote server. +* The `MkDocs` documentation generator is used. +* When you try to delete a table column that `DEFAULT`/`MATERIALIZED` expressions of other columns depend on, an exception is thrown (zhang2014). +* Added the ability to parse an empty line in text formats as the number 0 for `Float` data types. This feature was previously available but was lost in release 1.1.54342. +* `Enum` values can be used in `min`, `max`, `sum` and some other functions. In these cases, it uses the corresponding numeric values. This feature was previously available but was lost in the release 1.1.54337. +* Added `max_expanded_ast_elements` to restrict the size of the AST after recursively expanding aliases. + +### Bug fixes: + +* Fixed cases when unnecessary columns were removed from subqueries in error, or not removed from subqueries containing `UNION ALL`. +* Fixed a bug in merges for `ReplacingMergeTree` tables. +* Fixed synchronous insertions in `Distributed` tables (`insert_distributed_sync = 1`). +* Fixed segfault for certain uses of `FULL` and `RIGHT JOIN` with duplicate columns in subqueries. +* Fixed segfault for certain uses of `replace_running_query` and `KILL QUERY`. +* Fixed the order of the `source` and `last_exception` columns in the `system.dictionaries` table. +* Fixed a bug when the `DROP DATABASE` query did not delete the file with metadata. +* Fixed the `DROP DATABASE` query for `Dictionary` databases. +* Fixed the low precision of `uniqHLL12` and `uniqCombined` functions for cardinalities greater than 100 million items (Alex Bocharov). +* Fixed the calculation of implicit default values when necessary to simultaneously calculate default explicit expressions in `INSERT` queries (zhang2014). +* Fixed a rare case when a query to a `MergeTree` table couldn't finish (chenxing-xc). +* Fixed a crash that occurred when running a `CHECK` query for `Distributed` tables if all shards are local (chenxing.xc). +* Fixed a slight performance regression with functions that use regular expressions. +* Fixed a performance regression when creating multidimensional arrays from complex expressions. +* Fixed a bug that could cause an extra `FORMAT` section to appear in an `.sql` file with metadata. +* Fixed a bug that caused the `max_table_size_to_drop` limit to apply when trying to delete a `MATERIALIZED VIEW` looking at an explicitly specified table. +* Fixed incompatibility with old clients (old clients were sometimes sent data with the `DateTime('timezone')` type, which they do not understand). +* Fixed a bug when reading `Nested` column elements of structures that were added using `ALTER` but that are empty for the old partitions, when the conditions for these columns moved to `PREWHERE`. +* Fixed a bug when filtering tables by virtual `_table` columns in queries to `Merge` tables. +* Fixed a bug when using `ALIAS` columns in `Distributed` tables. +* Fixed a bug that made dynamic compilation impossible for queries with aggregate functions from the `quantile` family. +* Fixed a race condition in the query execution pipeline that occurred in very rare cases when using `Merge` tables with a large number of tables, and when using `GLOBAL` subqueries. +* Fixed a crash when passing arrays of different sizes to an `arrayReduce` function when using aggregate functions from multiple arguments. +* Prohibited the use of queries with `UNION ALL` in a `MATERIALIZED VIEW`. +* Fixed an error during initialization of the `part_log` system table when the server starts (by default, `part_log` is disabled). + +### Backward incompatible changes: + +* Removed the `distributed_ddl_allow_replicated_alter` option. This behavior is enabled by default. +* Removed the `strict_insert_defaults` setting. If you were using this functionality, write to `clickhouse-feedback@yandex-team.com`. +* Removed the `UnsortedMergeTree` engine. + +## Clickhouse Release 1.1.54343, 2018-02-05 + +* Added macros support for defining cluster names in distributed DDL queries and constructors of Distributed tables: `CREATE TABLE distr ON CLUSTER '{cluster}' (...) ENGINE = Distributed('{cluster}', 'db', 'table')`. +* Now queries like `SELECT ... FROM table WHERE expr IN (subquery)` are processed using the `table` index. +* Improved processing of duplicates when inserting to Replicated tables, so they no longer slow down execution of the replication queue. + +## Clickhouse Release 1.1.54342, 2018-01-22 + +This release contains bug fixes for the previous release 1.1.54337: + +* Fixed a regression in 1.1.54337: if the default user has readonly access, then the server refuses to start up with the message `Cannot create database in readonly mode`. +* Fixed a regression in 1.1.54337: on systems with systemd, logs are always written to syslog regardless of the configuration; the watchdog script still uses init.d. +* Fixed a regression in 1.1.54337: wrong default configuration in the Docker image. +* Fixed nondeterministic behavior of GraphiteMergeTree (you can see it in log messages `Data after merge is not byte-identical to the data on another replicas`). +* Fixed a bug that may lead to inconsistent merges after OPTIMIZE query to Replicated tables (you may see it in log messages `Part ... intersects the previous part`). +* Buffer tables now work correctly when MATERIALIZED columns are present in the destination table (by zhang2014). +* Fixed a bug in implementation of NULL. + +## Clickhouse Release 1.1.54337, 2018-01-18 + +### New features: + +* Added support for storage of multi-dimensional arrays and tuples (`Tuple` data type) in tables. +* Support for table functions for `DESCRIBE` and `INSERT` queries. Added support for subqueries in `DESCRIBE`. Examples: `DESC TABLE remote('host', default.hits)`; `DESC TABLE (SELECT 1)`; `INSERT INTO TABLE FUNCTION remote('host', default.hits)`. Support for `INSERT INTO TABLE` in addition to `INSERT INTO`. +* Improved support for time zones. The `DateTime` data type can be annotated with the timezone that is used for parsing and formatting in text formats. Example: `DateTime('Europe/Moscow')`. When timezones are specified in functions for `DateTime` arguments, the return type will track the timezone, and the value will be displayed as expected. +* Added the functions `toTimeZone`, `timeDiff`, `toQuarter`, `toRelativeQuarterNum`. The `toRelativeHour`/`Minute`/`Second` functions can take a value of type `Date` as an argument. The `now` function name is case-sensitive. +* Added the `toStartOfFifteenMinutes` function (Kirill Shvakov). +* Added the `clickhouse format` tool for formatting queries. +* Added the `format_schema_path` configuration parameter (Marek Vavruşa). It is used for specifying a schema in `Cap'n Proto` format. Schema files can be located only in the specified directory. +* Added support for config substitutions (`incl` and `conf.d`) for configuration of external dictionaries and models (Pavel Yakunin). +* Added a column with documentation for the `system.settings` table (Kirill Shvakov). +* Added the `system.parts_columns` table with information about column sizes in each data part of `MergeTree` tables. +* Added the `system.models` table with information about loaded `CatBoost` machine learning models. +* Added the `mysql` and `odbc` table function and corresponding `MySQL` and `ODBC` table engines for accessing remote databases. This functionality is in the beta stage. +* Added the possibility to pass an argument of type `AggregateFunction` for the `groupArray` aggregate function (so you can create an array of states of some aggregate function). +* Removed restrictions on various combinations of aggregate function combinators. For example, you can use `avgForEachIf` as well as `avgIfForEach` aggregate functions, which have different behaviors. +* The `-ForEach` aggregate function combinator is extended for the case of aggregate functions of multiple arguments. +* Added support for aggregate functions of `Nullable` arguments even for cases when the function returns a non-`Nullable` result (added with the contribution of Silviu Caragea). Example: `groupArray`, `groupUniqArray`, `topK`. +* Added the `max_client_network_bandwidth` for `clickhouse-client` (Kirill Shvakov). +* Users with the ` readonly = 2` setting are allowed to work with TEMPORARY tables (CREATE, DROP, INSERT...) (Kirill Shvakov). +* Added support for using multiple consumers with the `Kafka` engine. Extended configuration options for `Kafka` (Marek Vavruša). +* Added the `intExp3` and `intExp4` functions. +* Added the `sumKahan` aggregate function. +* Added the to * Number* OrNull functions, where * Number* is a numeric type. +* Added support for `WITH` clauses for an `INSERT SELECT` query (author: zhang2014). +* Added settings: `http_connection_timeout`, `http_send_timeout`, `http_receive_timeout`. In particular, these settings are used for downloading data parts for replication. Changing these settings allows for faster failover if the network is overloaded. +* Added support for `ALTER` for tables of type `Null` (Anastasiya Tsarkova). +* The `reinterpretAsString` function is extended for all data types that are stored contiguously in memory. +* Added the `--silent` option for the `clickhouse-local` tool. It suppresses printing query execution info in stderr. +* Added support for reading values of type `Date` from text in a format where the month and/or day of the month is specified using a single digit instead of two digits (Amos Bird). + +### Performance optimizations: + +* Improved performance of aggregate functions `min`, `max`, `any`, `anyLast`, `anyHeavy`, `argMin`, `argMax` from string arguments. +* Improved performance of the functions `isInfinite`, `isFinite`, `isNaN`, `roundToExp2`. +* Improved performance of parsing and formatting `Date` and `DateTime` type values in text format. +* Improved performance and precision of parsing floating point numbers. +* Lowered memory usage for `JOIN` in the case when the left and right parts have columns with identical names that are not contained in `USING` . +* Improved performance of aggregate functions `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr` by reducing computational stability. The old functions are available under the names `varSampStable`, `varPopStable`, `stddevSampStable`, `stddevPopStable`, `covarSampStable`, `covarPopStable`, `corrStable`. + +### Bug fixes: + +* Fixed data deduplication after running a `DROP` or `DETACH PARTITION` query. In the previous version, dropping a partition and inserting the same data again was not working because inserted blocks were considered duplicates. +* Fixed a bug that could lead to incorrect interpretation of the `WHERE` clause for ` CREATE MATERIALIZED VIEW` queries with `POPULATE` . +* Fixed a bug in using the `root_path` parameter in the `zookeeper_servers` configuration. +* Fixed unexpected results of passing the `Date` argument to `toStartOfDay` . +* Fixed the `addMonths` and `subtractMonths` functions and the arithmetic for ` INTERVAL n MONTH` in cases when the result has the previous year. +* Added missing support for the `UUID` data type for `DISTINCT` , `JOIN` , and `uniq` aggregate functions and external dictionaries (Evgeniy Ivanov). Support for `UUID` is still incomplete. +* Fixed `SummingMergeTree` behavior in cases when the rows summed to zero. +* Various fixes for the `Kafka` engine (Marek Vavruša). +* Fixed incorrect behavior of the `Join` table engine (Amos Bird). +* Fixed incorrect allocator behavior under FreeBSD and OS X. +* The `extractAll` function now supports empty matches. +* Fixed an error that blocked usage of `libressl` instead of `openssl` . +* Fixed the ` CREATE TABLE AS SELECT` query from temporary tables. +* Fixed non-atomicity of updating the replication queue. This could lead to replicas being out of sync until the server restarts. +* Fixed possible overflow in `gcd` , `lcm` and `modulo` (`%` operator) (Maks Skorokhod). +* `-preprocessed` files are now created after changing `umask` (`umask` can be changed in the config). +* Fixed a bug in the background check of parts (`MergeTreePartChecker` ) when using a custom partition key. +* Fixed parsing of tuples (values of the `Tuple` data type) in text formats. +* Improved error messages about incompatible types passed to `multiIf` , `array` and some other functions. +* Redesigned support for `Nullable` types. Fixed bugs that may lead to a server crash. Fixed almost all other bugs related to ` NULL` support: incorrect type conversions in INSERT SELECT, insufficient support for Nullable in HAVING and PREWHERE, `join_use_nulls` mode, Nullable types as arguments of `OR` operator, etc. +* Fixed various bugs related to internal semantics of data types. Examples: unnecessary summing of `Enum` type fields in `SummingMergeTree` ; alignment of `Enum` types in `Pretty` formats, etc. +* Stricter checks for allowed combinations of composite columns. +* Fixed the overflow when specifying a very large parameter for the `FixedString` data type. +* Fixed a bug in the `topK` aggregate function in a generic case. +* Added the missing check for equality of array sizes in arguments of n-ary variants of aggregate functions with an `-Array` combinator. +* Fixed a bug in `--pager` for `clickhouse-client` (author: ks1322). +* Fixed the precision of the `exp10` function. +* Fixed the behavior of the `visitParamExtract` function for better compliance with documentation. +* Fixed the crash when incorrect data types are specified. +* Fixed the behavior of `DISTINCT` in the case when all columns are constants. +* Fixed query formatting in the case of using the `tupleElement` function with a complex constant expression as the tuple element index. +* Fixed a bug in `Dictionary` tables for `range_hashed` dictionaries. +* Fixed a bug that leads to excessive rows in the result of `FULL` and ` RIGHT JOIN` (Amos Bird). +* Fixed a server crash when creating and removing temporary files in `config.d` directories during config reload. +* Fixed the ` SYSTEM DROP DNS CACHE` query: the cache was flushed but addresses of cluster nodes were not updated. +* Fixed the behavior of ` MATERIALIZED VIEW` after executing ` DETACH TABLE` for the table under the view (Marek Vavruša). + +### Build improvements: + +* The `pbuilder` tool is used for builds. The build process is almost completely independent of the build host environment. +* A single build is used for different OS versions. Packages and binaries have been made compatible with a wide range of Linux systems. +* Added the `clickhouse-test` package. It can be used to run functional tests. +* The source tarball can now be published to the repository. It can be used to reproduce the build without using GitHub. +* Added limited integration with Travis CI. Due to limits on build time in Travis, only the debug build is tested and a limited subset of tests are run. +* Added support for `Cap'n'Proto` in the default build. +* Changed the format of documentation sources from `Restricted Text` to `Markdown`. +* Added support for `systemd` (Vladimir Smirnov). It is disabled by default due to incompatibility with some OS images and can be enabled manually. +* For dynamic code generation, `clang` and `lld` are embedded into the `clickhouse` binary. They can also be invoked as ` clickhouse clang` and ` clickhouse lld` . +* Removed usage of GNU extensions from the code. Enabled the `-Wextra` option. When building with `clang` the default is `libc++` instead of `libstdc++`. +* Extracted `clickhouse_parsers` and `clickhouse_common_io` libraries to speed up builds of various tools. + +### Backward incompatible changes: + +* The format for marks in `Log` type tables that contain `Nullable` columns was changed in a backward incompatible way. If you have these tables, you should convert them to the `TinyLog` type before starting up the new server version. To do this, replace `ENGINE = Log` with `ENGINE = TinyLog` in the corresponding `.sql` file in the `metadata` directory. If your table doesn't have `Nullable` columns or if the type of your table is not `Log`, then you don't need to do anything. +* Removed the `experimental_allow_extended_storage_definition_syntax` setting. Now this feature is enabled by default. +* The `runningIncome` function was renamed to `runningDifferenceStartingWithFirstvalue` to avoid confusion. +* Removed the ` FROM ARRAY JOIN arr` syntax when ARRAY JOIN is specified directly after FROM with no table (Amos Bird). +* Removed the `BlockTabSeparated` format that was used solely for demonstration purposes. +* Changed the state format for aggregate functions `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. If you have stored states of these aggregate functions in tables (using the `AggregateFunction` data type or materialized views with corresponding states), please write to clickhouse-feedback@yandex-team.com. +* In previous server versions there was an undocumented feature: if an aggregate function depends on parameters, you can still specify it without parameters in the AggregateFunction data type. Example: `AggregateFunction(quantiles, UInt64)` instead of `AggregateFunction(quantiles(0.5, 0.9), UInt64)`. This feature was lost. Although it was undocumented, we plan to support it again in future releases. +* Enum data types cannot be used in min/max aggregate functions. This ability will be returned in the next release. + +### Please note when upgrading: + +* When doing a rolling update on a cluster, at the point when some of the replicas are running the old version of ClickHouse and some are running the new version, replication is temporarily stopped and the message ` unknown parameter 'shard'` appears in the log. Replication will continue after all replicas of the cluster are updated. +* If different versions of ClickHouse are running on the cluster servers, it is possible that distributed queries using the following functions will have incorrect results: `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. You should update all cluster nodes. + +## ClickHouse release 1.1.54327, 2017-12-21 + +This release contains bug fixes for the previous release 1.1.54318: + +* Fixed bug with possible race condition in replication that could lead to data loss. This issue affects versions 1.1.54310 and 1.1.54318. If you use one of these versions with Replicated tables, the update is strongly recommended. This issue shows in logs in Warning messages like ` Part ... from own log doesn't exist.` The issue is relevant even if you don't see these messages in logs. + +## ClickHouse release 1.1.54318, 2017-11-30 + +This release contains bug fixes for the previous release 1.1.54310: + +* Fixed incorrect row deletions during merges in the SummingMergeTree engine +* Fixed a memory leak in unreplicated MergeTree engines +* Fixed performance degradation with frequent inserts in MergeTree engines +* Fixed an issue that was causing the replication queue to stop running +* Fixed rotation and archiving of server logs + +## ClickHouse release 1.1.54310, 2017-11-01 + +### New features: + +* Custom partitioning key for the MergeTree family of table engines. +* [ Kafka](https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/kafka) table engine. +* Added support for loading [CatBoost](https://catboost.yandex/) models and applying them to data stored in ClickHouse. +* Added support for time zones with non-integer offsets from UTC. +* Added support for arithmetic operations with time intervals. +* The range of values for the Date and DateTime types is extended to the year 2105. +* Added the ` CREATE MATERIALIZED VIEW x TO y` query (specifies an existing table for storing the data of a materialized view). +* Added the `ATTACH TABLE` query without arguments. +* The processing logic for Nested columns with names ending in -Map in a SummingMergeTree table was extracted to the sumMap aggregate function. You can now specify such columns explicitly. +* Max size of the IP trie dictionary is increased to 128M entries. +* Added the getSizeOfEnumType function. +* Added the sumWithOverflow aggregate function. +* Added support for the Cap'n Proto input format. +* You can now customize compression level when using the zstd algorithm. + +### Backward incompatible changes: + +* Creation of temporary tables with an engine other than Memory is not allowed. +* Explicit creation of tables with the View or MaterializedView engine is not allowed. +* During table creation, a new check verifies that the sampling key expression is included in the primary key. + +### Bug fixes: + +* Fixed hangups when synchronously inserting into a Distributed table. +* Fixed nonatomic adding and removing of parts in Replicated tables. +* Data inserted into a materialized view is not subjected to unnecessary deduplication. +* Executing a query to a Distributed table for which the local replica is lagging and remote replicas are unavailable does not result in an error anymore. +* Users don't need access permissions to the `default` database to create temporary tables anymore. +* Fixed crashing when specifying the Array type without arguments. +* Fixed hangups when the disk volume containing server logs is full. +* Fixed an overflow in the toRelativeWeekNum function for the first week of the Unix epoch. + +### Build improvements: + +* Several third-party libraries (notably Poco) were updated and converted to git submodules. + +## ClickHouse release 1.1.54304, 2017-10-19 + +### New features: + +* TLS support in the native protocol (to enable, set `tcp_ssl_port` in `config.xml` ). + +### Bug fixes: + +* `ALTER` for replicated tables now tries to start running as soon as possible. +* Fixed crashing when reading data with the setting `preferred_block_size_bytes=0.` +* Fixed crashes of `clickhouse-client` when pressing ` Page Down` +* Correct interpretation of certain complex queries with `GLOBAL IN` and `UNION ALL` +* `FREEZE PARTITION` always works atomically now. +* Empty POST requests now return a response with code 411. +* Fixed interpretation errors for expressions like `CAST(1 AS Nullable(UInt8)).` +* Fixed an error when reading `Array(Nullable(String))` columns from `MergeTree` tables. +* Fixed crashing when parsing queries like `SELECT dummy AS dummy, dummy AS b` +* Users are updated correctly with invalid `users.xml` +* Correct handling when an executable dictionary returns a non-zero response code. + +## ClickHouse release 1.1.54292, 2017-09-20 + +### New features: + +* Added the `pointInPolygon` function for working with coordinates on a coordinate plane. +* Added the `sumMap` aggregate function for calculating the sum of arrays, similar to `SummingMergeTree`. +* Added the `trunc` function. Improved performance of the rounding functions (`round`, `floor`, `ceil`, `roundToExp2`) and corrected the logic of how they work. Changed the logic of the `roundToExp2` function for fractions and negative numbers. +* The ClickHouse executable file is now less dependent on the libc version. The same ClickHouse executable file can run on a wide variety of Linux systems. There is still a dependency when using compiled queries (with the setting ` compile = 1` , which is not used by default). +* Reduced the time needed for dynamic compilation of queries. + +### Bug fixes: + +* Fixed an error that sometimes produced ` part ... intersects previous part` messages and weakened replica consistency. +* Fixed an error that caused the server to lock up if ZooKeeper was unavailable during shutdown. +* Removed excessive logging when restoring replicas. +* Fixed an error in the UNION ALL implementation. +* Fixed an error in the concat function that occurred if the first column in a block has the Array type. +* Progress is now displayed correctly in the system.merges table. + +## ClickHouse release 1.1.54289, 2017-09-13 + +### New features: + +* `SYSTEM` queries for server administration: `SYSTEM RELOAD DICTIONARY`, `SYSTEM RELOAD DICTIONARIES`, `SYSTEM DROP DNS CACHE`, `SYSTEM SHUTDOWN`, `SYSTEM KILL`. +* Added functions for working with arrays: `concat`, `arraySlice`, `arrayPushBack`, `arrayPushFront`, `arrayPopBack`, `arrayPopFront`. +* Added `root` and `identity` parameters for the ZooKeeper configuration. This allows you to isolate individual users on the same ZooKeeper cluster. +* Added aggregate functions `groupBitAnd`, `groupBitOr`, and `groupBitXor` (for compatibility, they are also available under the names `BIT_AND`, `BIT_OR`, and `BIT_XOR`). +* External dictionaries can be loaded from MySQL by specifying a socket in the filesystem. +* External dictionaries can be loaded from MySQL over SSL (`ssl_cert`, `ssl_key`, `ssl_ca` parameters). +* Added the `max_network_bandwidth_for_user` setting to restrict the overall bandwidth use for queries per user. +* Support for `DROP TABLE` for temporary tables. +* Support for reading `DateTime` values in Unix timestamp format from the `CSV` and `JSONEachRow` formats. +* Lagging replicas in distributed queries are now excluded by default (the default threshold is 5 minutes). +* FIFO locking is used during ALTER: an ALTER query isn't blocked indefinitely for continuously running queries. +* Option to set `umask` in the config file. +* Improved performance for queries with `DISTINCT` . + +### Bug fixes: + +* Improved the process for deleting old nodes in ZooKeeper. Previously, old nodes sometimes didn't get deleted if there were very frequent inserts, which caused the server to be slow to shut down, among other things. +* Fixed randomization when choosing hosts for the connection to ZooKeeper. +* Fixed the exclusion of lagging replicas in distributed queries if the replica is localhost. +* Fixed an error where a data part in a `ReplicatedMergeTree` table could be broken after running ` ALTER MODIFY` on an element in a `Nested` structure. +* Fixed an error that could cause SELECT queries to "hang". +* Improvements to distributed DDL queries. +* Fixed the query `CREATE TABLE ... AS `. +* Resolved the deadlock in the ` ALTER ... CLEAR COLUMN IN PARTITION` query for `Buffer` tables. +* Fixed the invalid default value for `Enum` s (0 instead of the minimum) when using the `JSONEachRow` and `TSKV` formats. +* Resolved the appearance of zombie processes when using a dictionary with an `executable` source. +* Fixed segfault for the HEAD query. + +### Improved workflow for developing and assembling ClickHouse: + +* You can use `pbuilder` to build ClickHouse. +* You can use `libc++` instead of `libstdc++` for builds on Linux. +* Added instructions for using static code analysis tools: `Coverage`, `clang-tidy`, `cppcheck`. + +### Please note when upgrading: + +* There is now a higher default value for the MergeTree setting `max_bytes_to_merge_at_max_space_in_pool` (the maximum total size of data parts to merge, in bytes): it has increased from 100 GiB to 150 GiB. This might result in large merges running after the server upgrade, which could cause an increased load on the disk subsystem. If the free space available on the server is less than twice the total amount of the merges that are running, this will cause all other merges to stop running, including merges of small data parts. As a result, INSERT requests will fail with the message "Merges are processing significantly slower than inserts." Use the ` SELECT * FROM system.merges` request to monitor the situation. You can also check the `DiskSpaceReservedForMerge` metric in the `system.metrics` table, or in Graphite. You don't need to do anything to fix this, since the issue will resolve itself once the large merges finish. If you find this unacceptable, you can restore the previous value for the `max_bytes_to_merge_at_max_space_in_pool` setting. To do this, go to the section in config.xml, set ```107374182400` and restart the server. + +## ClickHouse release 1.1.54284, 2017-08-29 + +* This is a bugfix release for the previous 1.1.54282 release. It fixes leaks in the parts directory in ZooKeeper. + +## ClickHouse release 1.1.54282, 2017-08-23 + +This release contains bug fixes for the previous release 1.1.54276: + +* Fixed `DB::Exception: Assertion violation: !_path.empty()` when inserting into a Distributed table. +* Fixed parsing when inserting in RowBinary format if input data starts with';'. +* Errors during runtime compilation of certain aggregate functions (e.g. `groupArray()`). + +## Clickhouse Release 1.1.54276, 2017-08-16 + +### New features: + +* Added an optional WITH section for a SELECT query. Example query: `WITH 1+1 AS a SELECT a, a*a` +* INSERT can be performed synchronously in a Distributed table: OK is returned only after all the data is saved on all the shards. This is activated by the setting insert_distributed_sync=1. +* Added the UUID data type for working with 16-byte identifiers. +* Added aliases of CHAR, FLOAT and other types for compatibility with the Tableau. +* Added the functions toYYYYMM, toYYYYMMDD, and toYYYYMMDDhhmmss for converting time into numbers. +* You can use IP addresses (together with the hostname) to identify servers for clustered DDL queries. +* Added support for non-constant arguments and negative offsets in the function `substring(str, pos, len).` +* Added the max_size parameter for the `groupArray(max_size)(column)` aggregate function, and optimized its performance. + +### Main changes: + +* Security improvements: all server files are created with 0640 permissions (can be changed via config parameter). +* Improved error messages for queries with invalid syntax. +* Significantly reduced memory consumption and improved performance when merging large sections of MergeTree data. +* Significantly increased the performance of data merges for the ReplacingMergeTree engine. +* Improved performance for asynchronous inserts from a Distributed table by combining multiple source inserts. To enable this functionality, use the setting distributed_directory_monitor_batch_inserts=1. + +### Backward incompatible changes: + +* Changed the binary format of aggregate states of `groupArray(array_column)` functions for arrays. + +### Complete list of changes: + +* Added the `output_format_json_quote_denormals` setting, which enables outputting nan and inf values in JSON format. +* Optimized stream allocation when reading from a Distributed table. +* Settings can be configured in readonly mode if the value doesn't change. +* Added the ability to retrieve non-integer granules of the MergeTree engine in order to meet restrictions on the block size specified in the preferred_block_size_bytes setting. The purpose is to reduce the consumption of RAM and increase cache locality when processing queries from tables with large columns. +* Efficient use of indexes that contain expressions like `toStartOfHour(x)` for conditions like `toStartOfHour(x) op сonstexpr.` +* Added new settings for MergeTree engines (the merge_tree section in config.xml): + - replicated_deduplication_window_seconds sets the number of seconds allowed for deduplicating inserts in Replicated tables. + - cleanup_delay_period sets how often to start cleanup to remove outdated data. + - replicated_can_become_leader can prevent a replica from becoming the leader (and assigning merges). +* Accelerated cleanup to remove outdated data from ZooKeeper. +* Multiple improvements and fixes for clustered DDL queries. Of particular interest is the new setting distributed_ddl_task_timeout, which limits the time to wait for a response from the servers in the cluster. +* Improved display of stack traces in the server logs. +* Added the "none" value for the compression method. +* You can use multiple dictionaries_config sections in config.xml. +* It is possible to connect to MySQL through a socket in the file system. +* The system.parts table has a new column with information about the size of marks, in bytes. + +### Bug fixes: + +* Distributed tables using a Merge table now work correctly for a SELECT query with a condition on the `_table` field. +* Fixed a rare race condition in ReplicatedMergeTree when checking data parts. +* Fixed possible freezing on "leader election" when starting a server. +* The max_replica_delay_for_distributed_queries setting was ignored when using a local replica of the data source. This has been fixed. +* Fixed incorrect behavior of `ALTER TABLE CLEAR COLUMN IN PARTITION` when attempting to clean a non-existing column. +* Fixed an exception in the multiIf function when using empty arrays or strings. +* Fixed excessive memory allocations when deserializing Native format. +* Fixed incorrect auto-update of Trie dictionaries. +* Fixed an exception when running queries with a GROUP BY clause from a Merge table when using SAMPLE. +* Fixed a crash of GROUP BY when using distributed_aggregation_memory_efficient=1. +* Now you can specify the database.table in the right side of IN and JOIN. +* Too many threads were used for parallel aggregation. This has been fixed. +* Fixed how the "if" function works with FixedString arguments. +* SELECT worked incorrectly from a Distributed table for shards with a weight of 0. This has been fixed. +* Running `CREATE VIEW IF EXISTS no longer causes crashes.` +* Fixed incorrect behavior when input_format_skip_unknown_fields=1 is set and there are negative numbers. +* Fixed an infinite loop in the `dictGetHierarchy()` function if there is some invalid data in the dictionary. +* Fixed `Syntax error: unexpected (...)` errors when running distributed queries with subqueries in an IN or JOIN clause and Merge tables. +* Fixed an incorrect interpretation of a SELECT query from Dictionary tables. +* Fixed the "Cannot mremap" error when using arrays in IN and JOIN clauses with more than 2 billion elements. +* Fixed the failover for dictionaries with MySQL as the source. + +### Improved workflow for developing and assembling ClickHouse: + +* Builds can be assembled in Arcadia. +* You can use gcc 7 to compile ClickHouse. +* Parallel builds using ccache+distcc are faster now. + +## ClickHouse release 1.1.54245, 2017-07-04 + +### New features: + +* Distributed DDL (for example, `CREATE TABLE ON CLUSTER`) +* The replicated request `ALTER TABLE CLEAR COLUMN IN PARTITION.` +* The engine for Dictionary tables (access to dictionary data in the form of a table). +* Dictionary database engine (this type of database automatically has Dictionary tables available for all the connected external dictionaries). +* You can check for updates to the dictionary by sending a request to the source. +* Qualified column names +* Quoting identifiers using double quotation marks. +* Sessions in the HTTP interface. +* The OPTIMIZE query for a Replicated table can can run not only on the leader. + +### Backward incompatible changes: + +* Removed SET GLOBAL. + +### Minor changes: + +* Now after an alert is triggered, the log prints the full stack trace. +* Relaxed the verification of the number of damaged/extra data parts at startup (there were too many false positives). + +### Bug fixes: + +* Fixed a bad connection "sticking" when inserting into a Distributed table. +* GLOBAL IN now works for a query from a Merge table that looks at a Distributed table. +* The incorrect number of cores was detected on a Google Compute Engine virtual machine. This has been fixed. +* Changes in how an executable source of cached external dictionaries works. +* Fixed the comparison of strings containing null characters. +* Fixed the comparison of Float32 primary key fields with constants. +* Previously, an incorrect estimate of the size of a field could lead to overly large allocations. +* Fixed a crash when querying a Nullable column added to a table using ALTER. +* Fixed a crash when sorting by a Nullable column, if the number of rows is less than LIMIT. +* Fixed an ORDER BY subquery consisting of only constant values. +* Previously, a Replicated table could remain in the invalid state after a failed DROP TABLE. +* Aliases for scalar subqueries with empty results are no longer lost. +* Now a query that used compilation does not fail with an error if the .so file gets damaged. diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index a6b9ca9298..216d80f100 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,3 +1,16 @@ +## ClickHouse release 18.14.15, 2018-11-21 + +### Исправления ошибок: +* При чтении столбцов типа `Array(String)`, размер требуемого куска памяти оценивался слишком большим, что приводило к исключению "Memory limit exceeded" при выполнении запроса. Ошибка появилась в версии 18.12.13. [#3589](https://github.com/yandex/ClickHouse/issues/3589) + +## ClickHouse release 18.14.14, 2018-11-20 + +### Исправления ошибок: +* Исправлена работа запросов `ON CLUSTER` в случае, когда в конфигурации кластера включено шифрование (флаг ``). [#3599](https://github.com/yandex/ClickHouse/pull/3599) + +### Улучшения процесса сборки ClickHouse: +* Испрпавлены проблемы сборки (llvm-7 из системы, macos) [#3582](https://github.com/yandex/ClickHouse/pull/3582) + ## ClickHouse release 18.14.13, 2018-11-08 ### Исправления ошибок: diff --git a/CMakeLists.txt b/CMakeLists.txt index 094fe8c7b0..1a8bd57e7b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -262,6 +262,7 @@ include (cmake/find_llvm.cmake) include (cmake/find_cpuid.cmake) include (cmake/find_hdfs3.cmake) include (cmake/find_consistent-hashing.cmake) +include (cmake/find_base64.cmake) if (ENABLE_TESTS) include (cmake/find_gtest.cmake) endif () diff --git a/README.md b/README.md index a488c55643..5ffefd793a 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,10 @@ ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time. -🎤🥂 **ClickHouse Meetup in [Amsterdam on November 15](https://events.yandex.com/events/meetings/15-11-2018/)** 🍰🔥🐻 - ## Useful Links * [Official website](https://clickhouse.yandex/) has quick high-level overview of ClickHouse on main page. * [Tutorial](https://clickhouse.yandex/tutorial.html) shows how to set up and query small ClickHouse cluster. * [Documentation](https://clickhouse.yandex/docs/en/) provides more in-depth information. +* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announces and reports about events. * [Contacts](https://clickhouse.yandex/#contacts) can help to get your questions answered if there are any. diff --git a/cmake/find_base64.cmake b/cmake/find_base64.cmake new file mode 100644 index 0000000000..ad71d11c1b --- /dev/null +++ b/cmake/find_base64.cmake @@ -0,0 +1,12 @@ +option (ENABLE_BASE64 "Enable base64" ON) + +if (ENABLE_BASE64) + if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/base64") + message (WARNING "submodule contrib/base64 is missing. to fix try run: \n git submodule update --init --recursive") + else() + set (BASE64_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/base64/include) + set (BASE64_LIBRARY base64) + set (USE_BASE64 1) + endif() +endif () + diff --git a/cmake/find_odbc.cmake b/cmake/find_odbc.cmake index d89e3b532d..32a410c6f1 100644 --- a/cmake/find_odbc.cmake +++ b/cmake/find_odbc.cmake @@ -71,10 +71,10 @@ if (ENABLE_ODBC) ) # MinGW find usually fails - if(MINGW) + if (MINGW) set(ODBC_INCLUDE_DIRECTORIES ".") set(ODBC_LIBRARIES odbc32) - endif() + endif () include(FindPackageHandleStandardArgs) find_package_handle_standard_args(ODBC @@ -82,6 +82,10 @@ if (ENABLE_ODBC) ODBC_INCLUDE_DIRECTORIES ODBC_LIBRARIES) + if (USE_STATIC_LIBRARIES) + list(APPEND ODBC_LIBRARIES ${LTDL_LIBRARY}) + endif () + mark_as_advanced(ODBC_FOUND ODBC_LIBRARIES ODBC_INCLUDE_DIRECTORIES) endif () endif () diff --git a/cmake/find_poco.cmake b/cmake/find_poco.cmake index d8468e5306..012f269d48 100644 --- a/cmake/find_poco.cmake +++ b/cmake/find_poco.cmake @@ -93,8 +93,8 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY) endif () if (OPENSSL_FOUND AND (NOT DEFINED ENABLE_POCO_NETSSL OR ENABLE_POCO_NETSSL)) - set (Poco_NetSSL_LIBRARY PocoNetSSL) - set (Poco_Crypto_LIBRARY PocoCrypto) + set (Poco_NetSSL_LIBRARY PocoNetSSL ${OPENSSL_LIBRARIES}) + set (Poco_Crypto_LIBRARY PocoCrypto ${OPENSSL_LIBRARIES}) endif () if (USE_STATIC_LIBRARIES AND USE_INTERNAL_ZLIB_LIBRARY) diff --git a/cmake/lib_name.cmake b/cmake/lib_name.cmake index b49276fc27..5c919b263e 100644 --- a/cmake/lib_name.cmake +++ b/cmake/lib_name.cmake @@ -1,5 +1,4 @@ set(DIVIDE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libdivide) -set(CITYHASH_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/cityhash102/include) set(COMMON_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/libs/libcommon/include ${ClickHouse_BINARY_DIR}/libs/libcommon/include) set(DBMS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/dbms/src ${ClickHouse_BINARY_DIR}/dbms/src) set(DOUBLE_CONVERSION_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/double-conversion) diff --git a/cmake/print_include_directories.cmake b/cmake/print_include_directories.cmake index 41c4773cfa..c4c5d00c54 100644 --- a/cmake/print_include_directories.cmake +++ b/cmake/print_include_directories.cmake @@ -10,6 +10,9 @@ list(APPEND dirs ${dirs1}) get_property (dirs1 TARGET common PROPERTY INCLUDE_DIRECTORIES) list(APPEND dirs ${dirs1}) +get_property (dirs1 TARGET cityhash PROPERTY INCLUDE_DIRECTORIES) +list(APPEND dirs ${dirs1}) + if (USE_INTERNAL_BOOST_LIBRARY) get_property (dirs1 TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY} PROPERTY INCLUDE_DIRECTORIES) list(APPEND dirs ${dirs1}) diff --git a/cmake/test_cpu.cmake b/cmake/test_cpu.cmake index 6894c58703..c360de5b96 100644 --- a/cmake/test_cpu.cmake +++ b/cmake/test_cpu.cmake @@ -45,6 +45,38 @@ if (HAVE_SSE42) set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") endif () +set (TEST_FLAG "-mssse3") +set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") +check_cxx_source_compiles(" + #include + int main() { + __m64 a = _mm_abs_pi8(__m64()); + (void)a; + return 0; + } +" HAVE_SSSE3) + +set (TEST_FLAG "-mavx") +set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") +check_cxx_source_compiles(" + #include + int main() { + auto a = _mm256_insert_epi8(__m256i(), 0, 0); + (void)a; + return 0; + } +" HAVE_AVX) + +set (TEST_FLAG "-mavx2") +set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") +check_cxx_source_compiles(" + #include + int main() { + auto a = _mm256_add_epi16(__m256i(), __m256i()); + (void)a; + return 0; + } +" HAVE_AVX2) # gcc -dM -E -mpopcnt - < /dev/null | sort > gcc-dump-popcnt #define __POPCNT__ 1 @@ -65,5 +97,3 @@ if (HAVE_POPCNT AND NOT ARCH_AARCH64) endif () cmake_pop_check_state () - -# TODO: add here sse3 test if you want use it diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 20c00d3f54..b7085f992c 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -200,3 +200,7 @@ if (USE_INTERNAL_HDFS3_LIBRARY) endif () add_subdirectory(libhdfs3-cmake) endif () + +if (USE_BASE64) + add_subdirectory (base64-cmake) +endif() diff --git a/contrib/base64 b/contrib/base64 new file mode 160000 index 0000000000..a27c565d1b --- /dev/null +++ b/contrib/base64 @@ -0,0 +1 @@ +Subproject commit a27c565d1b6c676beaf297fe503c4518185666f7 diff --git a/contrib/base64-cmake/.gitignore b/contrib/base64-cmake/.gitignore new file mode 100644 index 0000000000..0e56cf2f8c --- /dev/null +++ b/contrib/base64-cmake/.gitignore @@ -0,0 +1 @@ +config.h diff --git a/contrib/base64-cmake/CMakeLists.txt b/contrib/base64-cmake/CMakeLists.txt new file mode 100644 index 0000000000..09abb3b02b --- /dev/null +++ b/contrib/base64-cmake/CMakeLists.txt @@ -0,0 +1,52 @@ +SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/base64) + +set(base64_compile_instructions "") +LIST(LENGTH base64_compile_instructions 0) +macro(cast_to_bool var instruction) + if (HAVE_${var}) + set(base64_${var} 1) + set(base64_${var}_opt ${instruction}) + else() + set(base64_${var} 0) + endif() +endmacro() + +cast_to_bool(SSSE3 "-mssse3") +cast_to_bool(SSE41 "-msse4.1") +cast_to_bool(SSE42 "-msse4.2") +cast_to_bool(AVX "-mavx") +cast_to_bool(AVX2 "-mavx2") + +# write config.h file, to include it in application +file(READ config-header.tpl header) +file(WRITE config.h ${header}) +file(APPEND config.h "#define HAVE_SSSE3 ${base64_SSSE3}\n") +file(APPEND config.h "#define HAVE_SSE41 ${base64_SSE41}\n") +file(APPEND config.h "#define HAVE_SSE42 ${base64_SSE42}\n") +file(APPEND config.h "#define HAVE_AVX ${base64_AVX}\n") +file(APPEND config.h "#define HAVE_AVX2 ${base64_AVX2}\n") + +set(HAVE_FAST_UNALIGNED_ACCESS 0) +if (${base64_SSSE3} OR ${base64_SSE41} OR ${base64_SSE42} OR ${base64_AVX} OR ${base64_AVX2}) + set(HAVE_FAST_UNALIGNED_ACCESS 1) +endif () + +file(APPEND config.h "#define HAVE_FAST_UNALIGNED_ACCESS " ${HAVE_FAST_UNALIGNED_ACCESS} "\n") + +add_library(base64 ${LINK_MODE} + ${LIBRARY_DIR}/lib/lib.c + ${LIBRARY_DIR}/lib/codec_choose.c + ${LIBRARY_DIR}/lib/arch/avx/codec.c + ${LIBRARY_DIR}/lib/arch/avx2/codec.c + ${LIBRARY_DIR}/lib/arch/generic/codec.c + ${LIBRARY_DIR}/lib/arch/neon32/codec.c + ${LIBRARY_DIR}/lib/arch/neon64/codec.c + ${LIBRARY_DIR}/lib/arch/sse41/codec.c + ${LIBRARY_DIR}/lib/arch/sse42/codec.c + ${LIBRARY_DIR}/lib/arch/ssse3/codec.c + + ${LIBRARY_DIR}/lib/codecs.h + config.h) + +target_compile_options(base64 PRIVATE ${base64_SSSE3_opt} ${base64_SSE41_opt} ${base64_SSE42_opt} ${base64_AVX_opt} ${base64_AVX2_opt}) +target_include_directories(base64 PRIVATE ${LIBRARY_DIR}/include .) \ No newline at end of file diff --git a/contrib/base64-cmake/config-header.tpl b/contrib/base64-cmake/config-header.tpl new file mode 100644 index 0000000000..c978ca0c08 --- /dev/null +++ b/contrib/base64-cmake/config-header.tpl @@ -0,0 +1,2 @@ +#define HAVE_NEON32 0 +#define HAVE_NEON64 0 diff --git a/contrib/cityhash102/CMakeLists.txt b/contrib/cityhash102/CMakeLists.txt index eafa6f2025..c3f53a8f87 100644 --- a/contrib/cityhash102/CMakeLists.txt +++ b/contrib/cityhash102/CMakeLists.txt @@ -1,9 +1,8 @@ add_library(cityhash src/city.cc - include/citycrc.h include/city.h src/config.h) -target_include_directories (cityhash BEFORE PUBLIC include) -target_include_directories (cityhash PRIVATE src) +target_include_directories(cityhash BEFORE PUBLIC include) +target_include_directories(cityhash PRIVATE src) diff --git a/contrib/poco b/contrib/poco index 566162b324..20c1d87777 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 566162b324e0b73eb360a1cd38077c9287cc1106 +Subproject commit 20c1d877773b6a672f1bbfe3290dfea42a117ed5 diff --git a/contrib/ssl b/contrib/ssl index 919f6f1331..dbbbcdbbd1 160000 --- a/contrib/ssl +++ b/contrib/ssl @@ -1 +1 @@ -Subproject commit 919f6f1331d500bfdd26f8bbbf88e92c0119879b +Subproject commit dbbbcdbbd17785566f8f9c107b714f9e213d7293 diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index d2dac6f7f6..c3f54a4bee 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -119,7 +119,7 @@ endif () if (USE_EMBEDDED_COMPILER) llvm_libs_all(REQUIRED_LLVM_LIBRARIES) - target_link_libraries (dbms ${REQUIRED_LLVM_LIBRARIES}) + target_link_libraries (dbms PRIVATE ${REQUIRED_LLVM_LIBRARIES}) target_include_directories (dbms SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) endif () @@ -150,34 +150,49 @@ if (NOT ARCH_ARM AND CPUID_LIBRARY) endif() target_link_libraries (clickhouse_common_io + PUBLIC common + PRIVATE string_utils widechar_width ${LINK_LIBRARIES_ONLY_ON_X86_64} ${LZ4_LIBRARY} ${ZSTD_LIBRARY} ${DOUBLE_CONVERSION_LIBRARIES} + pocoext + PUBLIC ${Poco_Net_LIBRARY} ${Poco_Util_LIBRARY} ${Poco_Foundation_LIBRARY} + ${RE2_LIBRARY} + ${RE2_ST_LIBRARY} + ${CITYHASH_LIBRARIES} + PRIVATE ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARY} ${ELF_LIBRARY} + PUBLIC ${Boost_SYSTEM_LIBRARY} + PRIVATE apple_rt ${CMAKE_DL_LIBS} ${HDFS3_LIBRARY} ) target_link_libraries (dbms + PRIVATE clickhouse_parsers clickhouse_common_config + PUBLIC clickhouse_common_io + pocoext + PUBLIC ${MYSQLXX_LIBRARY} - ${RE2_LIBRARY} - ${RE2_ST_LIBRARY} + PRIVATE ${BTRIE_LIBRARIES} ${Boost_PROGRAM_OPTIONS_LIBRARY} + PUBLIC + ${Boost_SYSTEM_LIBRARY} ) if (NOT USE_INTERNAL_RE2_LIBRARY) @@ -194,8 +209,8 @@ if (Poco_SQL_FOUND AND NOT USE_INTERNAL_POCO_LIBRARY) endif() if (USE_POCO_SQLODBC) - target_link_libraries (clickhouse_common_io ${Poco_SQL_LIBRARY}) - target_link_libraries (dbms ${Poco_SQLODBC_LIBRARY} ${Poco_SQL_LIBRARY}) + target_link_libraries (clickhouse_common_io PRIVATE ${Poco_SQL_LIBRARY}) + target_link_libraries (dbms PRIVATE ${Poco_SQLODBC_LIBRARY} ${Poco_SQL_LIBRARY}) if (NOT USE_INTERNAL_POCO_LIBRARY) target_include_directories (clickhouse_common_io SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_SQL_INCLUDE_DIR}) target_include_directories (dbms SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_SQLODBC_INCLUDE_DIR} PUBLIC ${Poco_SQL_INCLUDE_DIR}) @@ -209,48 +224,44 @@ if (Poco_Data_FOUND) endif() if (USE_POCO_DATAODBC) - target_link_libraries (clickhouse_common_io ${Poco_Data_LIBRARY}) - target_link_libraries (dbms ${Poco_DataODBC_LIBRARY}) + target_link_libraries (clickhouse_common_io PRIVATE ${Poco_Data_LIBRARY}) + target_link_libraries (dbms PRIVATE ${Poco_DataODBC_LIBRARY}) if (NOT USE_INTERNAL_POCO_LIBRARY) target_include_directories (dbms SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_DataODBC_INCLUDE_DIR}) endif() endif() if (USE_POCO_MONGODB) - target_link_libraries (dbms ${Poco_MongoDB_LIBRARY}) + target_link_libraries (dbms PRIVATE ${Poco_MongoDB_LIBRARY}) endif() if (USE_POCO_NETSSL) - target_link_libraries (clickhouse_common_io ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) - target_link_libraries (dbms ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) + target_link_libraries (clickhouse_common_io PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) + target_link_libraries (dbms PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) endif() -target_link_libraries (dbms ${Poco_Foundation_LIBRARY}) +target_link_libraries (dbms PRIVATE ${Poco_Foundation_LIBRARY}) if (USE_ICU) - target_link_libraries (dbms ${ICU_LIBS}) + target_link_libraries (dbms PRIVATE ${ICU_LIBS}) target_include_directories (dbms SYSTEM PRIVATE ${ICU_INCLUDE_DIR}) endif () if (USE_CAPNP) - target_link_libraries (dbms ${CAPNP_LIBRARY}) + target_link_libraries (dbms PRIVATE ${CAPNP_LIBRARY}) if (NOT USE_INTERNAL_CAPNP_LIBRARY) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${CAPNP_INCLUDE_DIR}) endif () endif () if (USE_RDKAFKA) - target_link_libraries (dbms ${RDKAFKA_LIBRARY}) + target_link_libraries (dbms PRIVATE ${RDKAFKA_LIBRARY}) if (NOT USE_INTERNAL_RDKAFKA_LIBRARY) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${RDKAFKA_INCLUDE_DIR}) endif () endif () -target_link_libraries(dbms ${OPENSSL_CRYPTO_LIBRARY}) - -target_link_libraries (dbms - Threads::Threads -) +target_link_libraries(dbms PRIVATE ${OPENSSL_CRYPTO_LIBRARY} Threads::Threads) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${DIVIDE_INCLUDE_DIR}) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR}) @@ -286,6 +297,6 @@ if (ENABLE_TESTS) # attach all dbms gtest sources grep_gtest_sources(${ClickHouse_SOURCE_DIR}/dbms dbms_gtest_sources) add_executable(unit_tests_dbms ${dbms_gtest_sources}) - target_link_libraries(unit_tests_dbms gtest_main dbms) + target_link_libraries(unit_tests_dbms PRIVATE gtest_main dbms clickhouse_common_zookeeper) add_check(unit_tests_dbms) endif () diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index e62a8e4796..9f623279b9 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -22,3 +22,5 @@ endif () set (VERSION_NAME "${PROJECT_NAME}" CACHE STRING "") set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}" CACHE STRING "") set (VERSION_SO "${VERSION_STRING}" CACHE STRING "") + +math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000") diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 136616ca44..441b39d996 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -48,45 +48,45 @@ else () link_directories (${LLVM_LIBRARY_DIRS}) endif () add_executable (clickhouse main.cpp) - target_link_libraries (clickhouse clickhouse_common_io) + target_link_libraries (clickhouse PRIVATE clickhouse_common_io) target_include_directories (clickhouse BEFORE PRIVATE ${COMMON_INCLUDE_DIR}) target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) if (ENABLE_CLICKHOUSE_SERVER) - target_link_libraries (clickhouse clickhouse-server-lib) + target_link_libraries (clickhouse PRIVATE clickhouse-server-lib) endif () if (ENABLE_CLICKHOUSE_CLIENT) - target_link_libraries (clickhouse clickhouse-client-lib) + target_link_libraries (clickhouse PRIVATE clickhouse-client-lib) endif () if (ENABLE_CLICKHOUSE_LOCAL) - target_link_libraries (clickhouse clickhouse-local-lib) + target_link_libraries (clickhouse PRIVATE clickhouse-local-lib) endif () if (ENABLE_CLICKHOUSE_BENCHMARK) - target_link_libraries (clickhouse clickhouse-benchmark-lib) + target_link_libraries (clickhouse PRIVATE clickhouse-benchmark-lib) endif () if (ENABLE_CLICKHOUSE_PERFORMANCE) - target_link_libraries (clickhouse clickhouse-performance-test-lib) + target_link_libraries (clickhouse PRIVATE clickhouse-performance-test-lib) endif () if (ENABLE_CLICKHOUSE_COPIER) - target_link_libraries (clickhouse clickhouse-copier-lib) + target_link_libraries (clickhouse PRIVATE clickhouse-copier-lib) endif () if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG) - target_link_libraries (clickhouse clickhouse-extract-from-config-lib) + target_link_libraries (clickhouse PRIVATE clickhouse-extract-from-config-lib) endif () if (ENABLE_CLICKHOUSE_COMPRESSOR) - target_link_libraries (clickhouse clickhouse-compressor-lib) + target_link_libraries (clickhouse PRIVATE clickhouse-compressor-lib) endif () if (ENABLE_CLICKHOUSE_FORMAT) - target_link_libraries (clickhouse clickhouse-format-lib) + target_link_libraries (clickhouse PRIVATE clickhouse-format-lib) endif () if (ENABLE_CLICKHOUSE_OBFUSCATOR) - target_link_libraries (clickhouse clickhouse-obfuscator-lib) + target_link_libraries (clickhouse PRIVATE clickhouse-obfuscator-lib) endif () if (USE_EMBEDDED_COMPILER) - target_link_libraries (clickhouse clickhouse-compiler-lib) + target_link_libraries (clickhouse PRIVATE clickhouse-compiler-lib) endif () if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) - target_link_libraries (clickhouse clickhouse-odbc-bridge-lib) + target_link_libraries (clickhouse PRIVATE clickhouse-odbc-bridge-lib) endif() set (CLICKHOUSE_BUNDLE) diff --git a/dbms/programs/benchmark/Benchmark.cpp b/dbms/programs/benchmark/Benchmark.cpp index 161321f4d8..8931774bac 100644 --- a/dbms/programs/benchmark/Benchmark.cpp +++ b/dbms/programs/benchmark/Benchmark.cpp @@ -42,10 +42,8 @@ namespace DB namespace ErrorCodes { - extern const int POCO_EXCEPTION; - extern const int STD_EXCEPTION; - extern const int UNKNOWN_EXCEPTION; extern const int BAD_ARGUMENTS; + extern const int EMPTY_DATA_PASSED; } class Benchmark @@ -170,7 +168,7 @@ private: } if (queries.empty()) - throw Exception("Empty list of queries."); + throw Exception("Empty list of queries.", ErrorCodes::EMPTY_DATA_PASSED); std::cerr << "Loaded " << queries.size() << " queries.\n"; } diff --git a/dbms/programs/benchmark/CMakeLists.txt b/dbms/programs/benchmark/CMakeLists.txt index 85af075071..c41c46edeb 100644 --- a/dbms/programs/benchmark/CMakeLists.txt +++ b/dbms/programs/benchmark/CMakeLists.txt @@ -1,8 +1,8 @@ add_library (clickhouse-benchmark-lib ${LINK_MODE} Benchmark.cpp) -target_link_libraries (clickhouse-benchmark-lib clickhouse-client-lib clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-benchmark-lib PRIVATE clickhouse-client-lib clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-benchmark-lib SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR}) if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-benchmark clickhouse-benchmark.cpp) - target_link_libraries (clickhouse-benchmark clickhouse-benchmark-lib clickhouse_aggregate_functions) + target_link_libraries (clickhouse-benchmark PRIVATE clickhouse-benchmark-lib clickhouse_aggregate_functions) endif () diff --git a/dbms/programs/clang/CMakeLists.txt b/dbms/programs/clang/CMakeLists.txt index ca06f27546..dec21ac611 100644 --- a/dbms/programs/clang/CMakeLists.txt +++ b/dbms/programs/clang/CMakeLists.txt @@ -6,9 +6,9 @@ if (CLICKHOUSE_SPLIT_BINARY) if (USE_EMBEDDED_COMPILER) link_directories (${LLVM_LIBRARY_DIRS}) add_executable (clickhouse-clang clickhouse-clang.cpp) - target_link_libraries (clickhouse-clang clickhouse-compiler-lib) + target_link_libraries (clickhouse-clang PRIVATE clickhouse-compiler-lib) add_executable (clickhouse-lld clickhouse-lld.cpp) - target_link_libraries (clickhouse-lld clickhouse-compiler-lib) + target_link_libraries (clickhouse-lld PRIVATE clickhouse-compiler-lib) install (TARGETS clickhouse-clang clickhouse-lld RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif () endif () diff --git a/dbms/programs/client/CMakeLists.txt b/dbms/programs/client/CMakeLists.txt index 65353094c2..f3dd518e89 100644 --- a/dbms/programs/client/CMakeLists.txt +++ b/dbms/programs/client/CMakeLists.txt @@ -1,13 +1,12 @@ add_library (clickhouse-client-lib ${LINK_MODE} Client.cpp) -target_link_libraries (clickhouse-client-lib clickhouse_common_io clickhouse_functions clickhouse_aggregate_functions ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-client-lib PRIVATE clickhouse_common_io clickhouse_functions clickhouse_aggregate_functions ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) if (READLINE_INCLUDE_DIR) target_include_directories (clickhouse-client-lib SYSTEM PRIVATE ${READLINE_INCLUDE_DIR}) endif () if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-client clickhouse-client.cpp) - target_link_libraries (clickhouse-client clickhouse-client-lib) + target_link_libraries (clickhouse-client PRIVATE clickhouse-client-lib) endif () install (FILES clickhouse-client.xml DESTINATION ${CLICKHOUSE_ETC_DIR}/clickhouse-client COMPONENT clickhouse-client RENAME config.xml) - diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 5a6f2b9bf2..784b172b6a 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -87,9 +87,6 @@ namespace DB namespace ErrorCodes { - extern const int POCO_EXCEPTION; - extern const int STD_EXCEPTION; - extern const int UNKNOWN_EXCEPTION; extern const int NETWORK_ERROR; extern const int NO_DATA_TO_INSERT; extern const int BAD_ARGUMENTS; diff --git a/dbms/programs/client/Suggest.h b/dbms/programs/client/Suggest.h index b93086d3b0..617e2bb520 100644 --- a/dbms/programs/client/Suggest.h +++ b/dbms/programs/client/Suggest.h @@ -184,7 +184,7 @@ public: } catch (...) { - std::cerr << "Cannot load data for command line suggestions: " << getCurrentExceptionMessage(false) << "\n"; + std::cerr << "Cannot load data for command line suggestions: " << getCurrentExceptionMessage(false, true) << "\n"; } /// Note that keyword suggestions are available even if we cannot load data from server. diff --git a/dbms/programs/compressor/CMakeLists.txt b/dbms/programs/compressor/CMakeLists.txt index 5c9c11072c..7aa2cad570 100644 --- a/dbms/programs/compressor/CMakeLists.txt +++ b/dbms/programs/compressor/CMakeLists.txt @@ -1,8 +1,8 @@ add_library (clickhouse-compressor-lib ${LINK_MODE} Compressor.cpp) -target_link_libraries (clickhouse-compressor-lib clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-compressor-lib PRIVATE clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) if (CLICKHOUSE_SPLIT_BINARY) # Also in utils add_executable (clickhouse-compressor clickhouse-compressor.cpp) - target_link_libraries (clickhouse-compressor clickhouse-compressor-lib) + target_link_libraries (clickhouse-compressor PRIVATE clickhouse-compressor-lib) endif () diff --git a/dbms/programs/copier/CMakeLists.txt b/dbms/programs/copier/CMakeLists.txt index e8583dba44..ed3e55208a 100644 --- a/dbms/programs/copier/CMakeLists.txt +++ b/dbms/programs/copier/CMakeLists.txt @@ -1,5 +1,5 @@ add_library (clickhouse-copier-lib ${LINK_MODE} ClusterCopier.cpp) -target_link_libraries (clickhouse-copier-lib clickhouse-server-lib clickhouse_functions clickhouse_aggregate_functions) +target_link_libraries (clickhouse-copier-lib PRIVATE clickhouse-server-lib clickhouse_functions clickhouse_aggregate_functions daemon) if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-copier clickhouse-copier.cpp) diff --git a/dbms/programs/extract-from-config/CMakeLists.txt b/dbms/programs/extract-from-config/CMakeLists.txt index c31b0e8cec..6225364936 100644 --- a/dbms/programs/extract-from-config/CMakeLists.txt +++ b/dbms/programs/extract-from-config/CMakeLists.txt @@ -1,7 +1,7 @@ add_library (clickhouse-extract-from-config-lib ${LINK_MODE} ExtractFromConfig.cpp) -target_link_libraries (clickhouse-extract-from-config-lib clickhouse_common_config clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-extract-from-config-lib PRIVATE clickhouse_common_config clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-extract-from-config clickhouse-extract-from-config.cpp) - target_link_libraries (clickhouse-extract-from-config clickhouse-extract-from-config-lib) + target_link_libraries (clickhouse-extract-from-config PRIVATE clickhouse-extract-from-config-lib) endif () diff --git a/dbms/programs/format/CMakeLists.txt b/dbms/programs/format/CMakeLists.txt index f53cd11bee..53d09e8262 100644 --- a/dbms/programs/format/CMakeLists.txt +++ b/dbms/programs/format/CMakeLists.txt @@ -1,6 +1,6 @@ add_library (clickhouse-format-lib ${LINK_MODE} Format.cpp) -target_link_libraries (clickhouse-format-lib dbms clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-format-lib PRIVATE dbms clickhouse_common_io clickhouse_parsers ${Boost_PROGRAM_OPTIONS_LIBRARY}) if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-format clickhouse-format.cpp) - target_link_libraries (clickhouse-format clickhouse-format-lib) + target_link_libraries (clickhouse-format PRIVATE clickhouse-format-lib) endif () diff --git a/dbms/programs/local/CMakeLists.txt b/dbms/programs/local/CMakeLists.txt index 9680ceaf1c..07729d6856 100644 --- a/dbms/programs/local/CMakeLists.txt +++ b/dbms/programs/local/CMakeLists.txt @@ -1,7 +1,7 @@ add_library (clickhouse-local-lib ${LINK_MODE} LocalServer.cpp) -target_link_libraries (clickhouse-local-lib clickhouse_common_io clickhouse-server-lib clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-local-lib PRIVATE clickhouse_common_io clickhouse-server-lib clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions ${Boost_PROGRAM_OPTIONS_LIBRARY}) if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-local clickhouse-local.cpp) - target_link_libraries (clickhouse-local clickhouse-local-lib) + target_link_libraries (clickhouse-local PRIVATE clickhouse-local-lib) endif () diff --git a/dbms/programs/obfuscator/CMakeLists.txt b/dbms/programs/obfuscator/CMakeLists.txt index 5ee6ace5a0..73c3f01e9c 100644 --- a/dbms/programs/obfuscator/CMakeLists.txt +++ b/dbms/programs/obfuscator/CMakeLists.txt @@ -1,8 +1,8 @@ add_library (clickhouse-obfuscator-lib ${LINK_MODE} Obfuscator.cpp) -target_link_libraries (clickhouse-obfuscator-lib dbms ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-obfuscator-lib PRIVATE dbms ${Boost_PROGRAM_OPTIONS_LIBRARY}) if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-obfuscator clickhouse-obfuscator.cpp) set_target_properties(clickhouse-obfuscator PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) - target_link_libraries (clickhouse-obfuscator clickhouse-obfuscator-lib) + target_link_libraries (clickhouse-obfuscator PRIVATE clickhouse-obfuscator-lib) endif () diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index d13a2866e7..a57c8c9c8c 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -9,23 +9,23 @@ add_library (clickhouse-odbc-bridge-lib ${LINK_MODE} validateODBCConnectionString.cpp ) -target_link_libraries (clickhouse-odbc-bridge-lib clickhouse_common_io daemon dbms) +target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE clickhouse_common_io daemon dbms) target_include_directories (clickhouse-odbc-bridge-lib PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include) if (USE_POCO_SQLODBC) - target_link_libraries (clickhouse-odbc-bridge-lib ${Poco_SQLODBC_LIBRARY}) + target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE ${Poco_SQLODBC_LIBRARY}) target_include_directories (clickhouse-odbc-bridge-lib SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_SQLODBC_INCLUDE_DIR}) endif () if (Poco_SQL_FOUND) - target_link_libraries (clickhouse-odbc-bridge-lib ${Poco_SQL_LIBRARY}) + target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE ${Poco_SQL_LIBRARY}) endif () if (USE_POCO_DATAODBC) - target_link_libraries (clickhouse-odbc-bridge-lib ${Poco_DataODBC_LIBRARY}) + target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE ${Poco_DataODBC_LIBRARY}) target_include_directories (clickhouse-odbc-bridge-lib SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_DataODBC_INCLUDE_DIR}) endif() if (Poco_Data_FOUND) - target_link_libraries (clickhouse-odbc-bridge-lib ${Poco_Data_LIBRARY}) + target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE ${Poco_Data_LIBRARY}) endif () @@ -35,5 +35,5 @@ endif () if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) - target_link_libraries (clickhouse-odbc-bridge clickhouse-odbc-bridge-lib) + target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) endif () diff --git a/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp b/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp index f59abd5f58..997ef9cf21 100644 --- a/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -124,9 +124,9 @@ void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & reques auto identifier_quote = getIdentifierQuote(hdbc); if (identifier_quote.length() == 0) settings.identifier_quoting_style = IdentifierQuotingStyle::None; - else if(identifier_quote[0] == '`') + else if (identifier_quote[0] == '`') settings.identifier_quoting_style = IdentifierQuotingStyle::Backticks; - else if(identifier_quote[0] == '"') + else if (identifier_quote[0] == '"') settings.identifier_quoting_style = IdentifierQuotingStyle::DoubleQuotes; else throw Exception("Can not map quote identifier '" + identifier_quote + "' to IdentifierQuotingStyle value", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/dbms/programs/odbc-bridge/HandlerFactory.cpp b/dbms/programs/odbc-bridge/HandlerFactory.cpp index 8a0ff06268..a6422db268 100644 --- a/dbms/programs/odbc-bridge/HandlerFactory.cpp +++ b/dbms/programs/odbc-bridge/HandlerFactory.cpp @@ -25,7 +25,7 @@ Poco::Net::HTTPRequestHandler * HandlerFactory::createRequestHandler(const Poco: #else return nullptr; #endif - else if(uri.getPath() == "/identifier_quote") + else if (uri.getPath() == "/identifier_quote") #if USE_POCO_SQLODBC || USE_POCO_DATAODBC return new IdentifierQuoteHandler(keep_alive_timeout, context); #else diff --git a/dbms/programs/odbc-bridge/tests/CMakeLists.txt b/dbms/programs/odbc-bridge/tests/CMakeLists.txt index 5240a91742..5211c39d11 100644 --- a/dbms/programs/odbc-bridge/tests/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/tests/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp) -target_link_libraries (validate-odbc-connection-string clickhouse-odbc-bridge-lib) +target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse-odbc-bridge-lib clickhouse_common_io) diff --git a/dbms/programs/performance-test/CMakeLists.txt b/dbms/programs/performance-test/CMakeLists.txt index 31796cd9d7..adad45025c 100644 --- a/dbms/programs/performance-test/CMakeLists.txt +++ b/dbms/programs/performance-test/CMakeLists.txt @@ -1,8 +1,8 @@ add_library (clickhouse-performance-test-lib ${LINK_MODE} PerformanceTest.cpp) -target_link_libraries (clickhouse-performance-test-lib clickhouse_common_io dbms ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-performance-test-lib PRIVATE dbms clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-performance-test-lib SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR}) if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-performance-test clickhouse-performance-test.cpp) - target_link_libraries (clickhouse-performance-test clickhouse-performance-test-lib dbms) + target_link_libraries (clickhouse-performance-test PRIVATE clickhouse-performance-test-lib) endif () diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index e63d40e2db..ebebedd876 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -49,10 +49,10 @@ namespace DB { namespace ErrorCodes { - extern const int POCO_EXCEPTION; - extern const int STD_EXCEPTION; - extern const int UNKNOWN_EXCEPTION; extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int FILE_DOESNT_EXIST; } static String pad(size_t padding) @@ -156,7 +156,7 @@ struct StopConditionsSet else if (key == "average_speed_not_changing_for_ms") average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); else - throw DB::Exception("Met unkown stop condition: " + key); + throw DB::Exception("Met unkown stop condition: " + key, DB::ErrorCodes::LOGICAL_ERROR); ++initialized_count; } @@ -521,7 +521,7 @@ public: { if (input_files.size() < 1) { - throw DB::Exception("No tests were specified", 0); + throw DB::Exception("No tests were specified", DB::ErrorCodes::BAD_ARGUMENTS); } std::string name; @@ -694,7 +694,7 @@ private: size_t ram_size_needed = config->getUInt64("preconditions.ram_size"); size_t actual_ram = getMemoryAmount(); if (!actual_ram) - throw DB::Exception("ram_size precondition not available on this platform", ErrorCodes::NOT_IMPLEMENTED); + throw DB::Exception("ram_size precondition not available on this platform", DB::ErrorCodes::NOT_IMPLEMENTED); if (ram_size_needed > actual_ram) { @@ -868,12 +868,12 @@ private: if (!test_config->has("query") && !test_config->has("query_file")) { - throw DB::Exception("Missing query fields in test's config: " + test_name); + throw DB::Exception("Missing query fields in test's config: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); } if (test_config->has("query") && test_config->has("query_file")) { - throw DB::Exception("Found both query and query_file fields. Choose only one"); + throw DB::Exception("Found both query and query_file fields. Choose only one", DB::ErrorCodes::BAD_ARGUMENTS); } if (test_config->has("query")) @@ -885,7 +885,7 @@ private: { const String filename = test_config->getString("query_file"); if (filename.empty()) - throw DB::Exception("Empty file name"); + throw DB::Exception("Empty file name", DB::ErrorCodes::BAD_ARGUMENTS); bool tsv = fs::path(filename).extension().string() == ".tsv"; @@ -909,7 +909,7 @@ private: if (queries.empty()) { - throw DB::Exception("Did not find any query to execute: " + test_name); + throw DB::Exception("Did not find any query to execute: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); } if (test_config->has("substitutions")) @@ -929,7 +929,7 @@ private: if (!test_config->has("type")) { - throw DB::Exception("Missing type property in config: " + test_name); + throw DB::Exception("Missing type property in config: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); } String config_exec_type = test_config->getString("type"); @@ -938,7 +938,7 @@ private: else if (config_exec_type == "once") exec_type = ExecutionType::Once; else - throw DB::Exception("Unknown type " + config_exec_type + " in :" + test_name); + throw DB::Exception("Unknown type " + config_exec_type + " in :" + test_name, DB::ErrorCodes::BAD_ARGUMENTS); times_to_run = test_config->getUInt("times_to_run", 1); @@ -951,7 +951,7 @@ private: } if (stop_conditions_template.empty()) - throw DB::Exception("No termination conditions were found in config"); + throw DB::Exception("No termination conditions were found in config", DB::ErrorCodes::BAD_ARGUMENTS); for (size_t i = 0; i < times_to_run * queries.size(); ++i) stop_conditions_by_run.push_back(stop_conditions_template); @@ -978,7 +978,7 @@ private: else { if (lite_output) - throw DB::Exception("Specify main_metric for lite output"); + throw DB::Exception("Specify main_metric for lite output", DB::ErrorCodes::BAD_ARGUMENTS); } if (metrics.size() > 0) @@ -1023,22 +1023,14 @@ private: if (exec_type == ExecutionType::Loop) { for (const String & metric : metrics) - { if (std::find(non_loop_metrics.begin(), non_loop_metrics.end(), metric) != non_loop_metrics.end()) - { - throw DB::Exception("Wrong type of metric for loop execution type (" + metric + ")"); - } - } + throw DB::Exception("Wrong type of metric for loop execution type (" + metric + ")", DB::ErrorCodes::BAD_ARGUMENTS); } else { for (const String & metric : metrics) - { if (std::find(loop_metrics.begin(), loop_metrics.end(), metric) != loop_metrics.end()) - { - throw DB::Exception("Wrong type of metric for non-loop execution type (" + metric + ")"); - } - } + throw DB::Exception("Wrong type of metric for non-loop execution type (" + metric + ")", DB::ErrorCodes::BAD_ARGUMENTS); } } @@ -1439,7 +1431,7 @@ try if (input_files.empty()) { std::cerr << std::endl; - throw DB::Exception("Did not find any xml files"); + throw DB::Exception("Did not find any xml files", DB::ErrorCodes::BAD_ARGUMENTS); } else std::cerr << " found " << input_files.size() << " files." << std::endl; @@ -1454,7 +1446,7 @@ try fs::path file(filename); if (!fs::exists(file)) - throw DB::Exception("File '" + filename + "' does not exist"); + throw DB::Exception("File '" + filename + "' does not exist", DB::ErrorCodes::FILE_DOESNT_EXIST); if (fs::is_directory(file)) { @@ -1463,7 +1455,7 @@ try else { if (file.extension().string() != ".xml") - throw DB::Exception("File '" + filename + "' does not have .xml extension"); + throw DB::Exception("File '" + filename + "' does not have .xml extension", DB::ErrorCodes::BAD_ARGUMENTS); collected_files.push_back(filename); } } diff --git a/dbms/programs/server/CMakeLists.txt b/dbms/programs/server/CMakeLists.txt index bc6683d6e9..d8caa07b74 100644 --- a/dbms/programs/server/CMakeLists.txt +++ b/dbms/programs/server/CMakeLists.txt @@ -10,12 +10,16 @@ add_library (clickhouse-server-lib ${LINK_MODE} TCPHandler.cpp ) -target_link_libraries (clickhouse-server-lib clickhouse_common_io daemon clickhouse_storages_system clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions) +target_link_libraries (clickhouse-server-lib PRIVATE clickhouse_common_io daemon clickhouse_storages_system clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions ${Poco_Net_LIBRARY}) +if (USE_POCO_NETSSL) + target_link_libraries (clickhouse-server-lib PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) +endif () + target_include_directories (clickhouse-server-lib PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include) if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-server clickhouse-server.cpp) - target_link_libraries (clickhouse-server clickhouse-server-lib) + target_link_libraries (clickhouse-server PRIVATE clickhouse-server-lib) install (TARGETS clickhouse-server ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif () diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index 9d9324b9a3..34037a7c7c 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -270,7 +270,6 @@ void HTTPHandler::processQuery( std::string query_id = params.get("query_id", ""); context.setUser(user, password, request.clientAddress(), quota_key); context.setCurrentQueryId(query_id); - CurrentThread::attachQueryContext(context); /// The user could specify session identifier and session timeout. /// It allows to modify settings, create temporary tables and reuse them in subsequent requests. diff --git a/dbms/programs/server/InterserverIOHTTPHandler.cpp b/dbms/programs/server/InterserverIOHTTPHandler.cpp index 3895b2d899..3c93ee1989 100644 --- a/dbms/programs/server/InterserverIOHTTPHandler.cpp +++ b/dbms/programs/server/InterserverIOHTTPHandler.cpp @@ -19,9 +19,6 @@ namespace DB namespace ErrorCodes { extern const int ABORTED; - extern const int POCO_EXCEPTION; - extern const int STD_EXCEPTION; - extern const int UNKNOWN_EXCEPTION; extern const int TOO_MANY_SIMULTANEOUS_QUERIES; } diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 0937cf4b84..b80a9c7073 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -55,6 +55,7 @@ namespace CurrentMetrics { extern const Metric Revision; + extern const Metric VersionInteger; } namespace DB @@ -66,6 +67,8 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; extern const int ARGUMENT_OUT_OF_BOUND; extern const int EXCESSIVE_ELEMENT_IN_CONFIG; + extern const int INVALID_CONFIG_PARAMETER; + extern const int SYSTEM_ERROR; } @@ -73,7 +76,7 @@ static std::string getCanonicalPath(std::string && path) { Poco::trimInPlace(path); if (path.empty()) - throw Exception("path configuration parameter is empty"); + throw Exception("path configuration parameter is empty", ErrorCodes::INVALID_CONFIG_PARAMETER); if (path.back() != '/') path += '/'; return std::move(path); @@ -108,6 +111,7 @@ int Server::main(const std::vector & /*args*/) registerStorages(); CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::get()); + CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); /** Context contains all that query execution is dependent: * settings, available functions, data types, aggregate functions, databases... @@ -141,7 +145,7 @@ int Server::main(const std::vector & /*args*/) { LOG_TRACE(log, "Will mlockall to prevent executable memory from being paged out. It may take a few seconds."); if (0 != mlockall(MCL_CURRENT)) - LOG_WARNING(log, "Failed mlockall: " + errnoToString()); + LOG_WARNING(log, "Failed mlockall: " + errnoToString(ErrorCodes::SYSTEM_ERROR)); else LOG_TRACE(log, "The memory map of clickhouse executable has been mlock'ed"); } diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 5c9b3a2d86..63e60d22c0 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -718,7 +718,7 @@ bool TCPHandler::receiveData() { NamesAndTypesList columns = block.getNamesAndTypesList(); storage = StorageMemory::create(external_table_name, - ColumnsDescription{columns, NamesAndTypesList{}, NamesAndTypesList{}, ColumnDefaults{}}); + ColumnsDescription{columns, NamesAndTypesList{}, NamesAndTypesList{}, ColumnDefaults{}, ColumnComments{}}); storage->startup(); query_context.addExternalTable(external_table_name, storage); } diff --git a/dbms/programs/server/config.xml b/dbms/programs/server/config.xml index 051f6f7fb2..514a081eac 100644 --- a/dbms/programs/server/config.xml +++ b/dbms/programs/server/config.xml @@ -164,6 +164,20 @@ + + + + localhost + 9000 + + + + + localhost + 9000 + + + diff --git a/dbms/programs/server/users.xml b/dbms/programs/server/users.xml index 6f746baf2a..979b2d3fc1 100644 --- a/dbms/programs/server/users.xml +++ b/dbms/programs/server/users.xml @@ -56,7 +56,7 @@ Each element of list has one of the following forms: IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0 - 2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::. + 2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::. Hostname. Example: server01.yandex.ru. To check access, DNS query is performed, and all received addresses compared to peer address. Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$ diff --git a/dbms/scripts/gen-bias-data.py b/dbms/scripts/gen-bias-data.py index 7edc9948e7..034cfcca7d 100755 --- a/dbms/scripts/gen-bias-data.py +++ b/dbms/scripts/gen-bias-data.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3.4 +#!/usr/bin/python3 # -*- coding: utf-8 -*- import sys diff --git a/dbms/src/AggregateFunctions/AggregateFunctionAvg.cpp b/dbms/src/AggregateFunctions/AggregateFunctionAvg.cpp index 36b29796b9..565f1f0c33 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionAvg.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionAvg.cpp @@ -12,7 +12,7 @@ namespace template struct Avg { - using FieldType = std::conditional_t, Decimal128, typename NearestFieldType::Type>; + using FieldType = std::conditional_t, Decimal128, NearestFieldType>; using Function = AggregateFunctionAvg>; }; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSum.cpp b/dbms/src/AggregateFunctions/AggregateFunctionSum.cpp index 45a97d2bc8..f21c60eeae 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSum.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionSum.cpp @@ -14,7 +14,7 @@ template struct SumSimple { /// @note It uses slow Decimal128 (cause we need such a variant). sumWithOverflow is faster for Decimal32/64 - using ResultType = std::conditional_t, Decimal128, typename NearestFieldType::Type>; + using ResultType = std::conditional_t, Decimal128, NearestFieldType>; using AggregateDataType = AggregateFunctionSumData; using Function = AggregateFunctionSum; }; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h index e89c0bf841..b79670eb3d 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -52,7 +52,7 @@ struct AggregateFunctionSumMapData template class AggregateFunctionSumMap final : public IAggregateFunctionDataHelper< - AggregateFunctionSumMapData::Type>, AggregateFunctionSumMap> + AggregateFunctionSumMapData>, AggregateFunctionSumMap> { private: using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp index 77b6c9cfb9..6b63a719b8 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp @@ -130,9 +130,6 @@ void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory) factory.registerFunction("uniqExact", createAggregateFunctionUniq>); - - factory.registerFunction("uniqCombined", - createAggregateFunctionUniq>); } } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h index 140928959a..980d62b40e 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h @@ -22,7 +22,6 @@ #include #include -#include #include @@ -124,46 +123,6 @@ struct AggregateFunctionUniqExactData static String getName() { return "uniqExact"; } }; -template -struct AggregateFunctionUniqCombinedData -{ - using Key = UInt32; - using Set = CombinedCardinalityEstimator< - Key, - HashSet>, - 16, - 14, - 17, - TrivialHash, - UInt32, - HyperLogLogBiasEstimator, - HyperLogLogMode::FullFeatured>; - - Set set; - - static String getName() { return "uniqCombined"; } -}; - -template <> -struct AggregateFunctionUniqCombinedData -{ - using Key = UInt64; - using Set = CombinedCardinalityEstimator< - Key, - HashSet>, - 16, - 14, - 17, - TrivialHash, - UInt64, - HyperLogLogBiasEstimator, - HyperLogLogMode::FullFeatured>; - - Set set; - - static String getName() { return "uniqCombined"; } -}; - namespace detail { @@ -199,39 +158,6 @@ template <> struct AggregateFunctionUniqTraits } }; -/** Hash function for uniqCombined. - */ -template struct AggregateFunctionUniqCombinedTraits -{ - static UInt32 hash(T x) { return static_cast(intHash64(x)); } -}; - -template <> struct AggregateFunctionUniqCombinedTraits -{ - static UInt32 hash(UInt128 x) - { - return sipHash64(x); - } -}; - -template <> struct AggregateFunctionUniqCombinedTraits -{ - static UInt32 hash(Float32 x) - { - UInt64 res = ext::bit_cast(x); - return static_cast(intHash64(res)); - } -}; - -template <> struct AggregateFunctionUniqCombinedTraits -{ - static UInt32 hash(Float64 x) - { - UInt64 res = ext::bit_cast(x); - return static_cast(intHash64(res)); - } -}; - /** The structure for the delegation work to add one element to the `uniq` aggregate functions. * Used for partial specialization to add strings. @@ -255,19 +181,6 @@ struct OneAdder data.set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size)); } } - else if constexpr (std::is_same_v>) - { - if constexpr (!std::is_same_v) - { - const auto & value = static_cast &>(column).getData()[row_num]; - data.set.insert(AggregateFunctionUniqCombinedTraits::hash(value)); - } - else - { - StringRef value = column.getDataAt(row_num); - data.set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size)); - } - } else if constexpr (std::is_same_v>) { if constexpr (!std::is_same_v) @@ -387,5 +300,4 @@ public: const char * getHeaderFilePath() const override { return __FILE__; } }; - } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp new file mode 100644 index 0000000000..90b84d3b92 --- /dev/null +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -0,0 +1,127 @@ +#include + +#include +#include + +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ARGUMENT_OUT_OF_BOUND; +} + +namespace +{ + template + struct WithK + { + template + using AggregateFunction = AggregateFunctionUniqCombined; + + template + using AggregateFunctionVariadic = AggregateFunctionUniqCombinedVariadic; + }; + + template + AggregateFunctionPtr createAggregateFunctionWithK(const DataTypes & argument_types) + { + /// We use exact hash function if the arguments are not contiguous in memory, because only exact hash function has support for this case. + bool use_exact_hash_function = !isAllArgumentsContiguousInMemory(argument_types); + + if (argument_types.size() == 1) + { + const IDataType & argument_type = *argument_types[0]; + + AggregateFunctionPtr res(createWithNumericType::template AggregateFunction>(*argument_types[0])); + + WhichDataType which(argument_type); + if (res) + return res; + else if (which.isDate()) + return std::make_shared::template AggregateFunction>(); + else if (which.isDateTime()) + return std::make_shared::template AggregateFunction>(); + else if (which.isStringOrFixedString()) + return std::make_shared::template AggregateFunction>(); + else if (which.isUUID()) + return std::make_shared::template AggregateFunction>(); + else if (which.isTuple()) + { + if (use_exact_hash_function) + return std::make_shared::template AggregateFunctionVariadic>(argument_types); + else + return std::make_shared::template AggregateFunctionVariadic>(argument_types); + } + } + + /// "Variadic" method also works as a fallback generic case for a single argument. + if (use_exact_hash_function) + return std::make_shared::template AggregateFunctionVariadic>(argument_types); + else + return std::make_shared::template AggregateFunctionVariadic>(argument_types); + } + + AggregateFunctionPtr createAggregateFunctionUniqCombined( + const std::string & name, const DataTypes & argument_types, const Array & params) + { + /// log2 of the number of cells in HyperLogLog. + /// Reasonable default value, selected to be comparable in quality with "uniq" aggregate function. + UInt8 precision = 17; + + if (!params.empty()) + { + if (params.size() != 1) + throw Exception( + "Aggregate function " + name + " requires one parameter or less.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + UInt64 precision_param = applyVisitor(FieldVisitorConvertToNumber(), params[0]); + + // This range is hardcoded below + if (precision_param > 20 || precision_param < 12) + throw Exception( + "Parameter for aggregate function " + name + "is out or range: [12, 20].", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + precision = precision_param; + } + + if (argument_types.empty()) + throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + switch (precision) + { + case 12: + return createAggregateFunctionWithK<12>(argument_types); + case 13: + return createAggregateFunctionWithK<13>(argument_types); + case 14: + return createAggregateFunctionWithK<14>(argument_types); + case 15: + return createAggregateFunctionWithK<15>(argument_types); + case 16: + return createAggregateFunctionWithK<16>(argument_types); + case 17: + return createAggregateFunctionWithK<17>(argument_types); + case 18: + return createAggregateFunctionWithK<18>(argument_types); + case 19: + return createAggregateFunctionWithK<19>(argument_types); + case 20: + return createAggregateFunctionWithK<20>(argument_types); + } + + __builtin_unreachable(); + } + +} // namespace + +void registerAggregateFunctionUniqCombined(AggregateFunctionFactory & factory) +{ + factory.registerFunction("uniqCombined", createAggregateFunctionUniqCombined); +} + +} // namespace DB diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h new file mode 100644 index 0000000000..99cc3a9389 --- /dev/null +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -0,0 +1,229 @@ +#pragma once + +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace detail +{ + /** Hash function for uniqCombined. + */ + template + struct AggregateFunctionUniqCombinedTraits + { + static UInt32 hash(T x) + { + return static_cast(intHash64(x)); + } + }; + + template <> + struct AggregateFunctionUniqCombinedTraits + { + static UInt32 hash(UInt128 x) + { + return sipHash64(x); + } + }; + + template <> + struct AggregateFunctionUniqCombinedTraits + { + static UInt32 hash(Float32 x) + { + UInt64 res = ext::bit_cast(x); + return static_cast(intHash64(res)); + } + }; + + template <> + struct AggregateFunctionUniqCombinedTraits + { + static UInt32 hash(Float64 x) + { + UInt64 res = ext::bit_cast(x); + return static_cast(intHash64(res)); + } + }; + +} // namespace detail + + +template +struct AggregateFunctionUniqCombinedDataWithKey +{ + // TODO(ilezhankin): pre-generate values for |UniqCombinedBiasData|, + // at the moment gen-bias-data.py script doesn't work. + + // We want to migrate from |HashSet| to |HyperLogLogCounter| when the sizes in memory become almost equal. + // The size per element in |HashSet| is sizeof(Key)*2 bytes, and the overall size of |HyperLogLogCounter| is 2^K * 6 bits. + // For Key=UInt32 we can calculate: 2^X * 4 * 2 ≤ 2^(K-3) * 6 ⇒ X ≤ K-4. + using Set = CombinedCardinalityEstimator>, 16, K - 4, K, TrivialHash, Key>; + + Set set; +}; + +template +struct AggregateFunctionUniqCombinedDataWithKey +{ + using Set = CombinedCardinalityEstimator>, + 16, + 13, + 17, + TrivialHash, + Key, + HyperLogLogBiasEstimator, + HyperLogLogMode::FullFeatured>; + + Set set; +}; + + +template +struct AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey +{ +}; + + +template +struct AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey +{ +}; + + +template +class AggregateFunctionUniqCombined final + : public IAggregateFunctionDataHelper, AggregateFunctionUniqCombined> +{ +public: + String getName() const override + { + return "uniqCombined"; + } + + DataTypePtr getReturnType() const override + { + return std::make_shared(); + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + { + if constexpr (!std::is_same_v) + { + const auto & value = static_cast &>(*columns[0]).getData()[row_num]; + this->data(place).set.insert(detail::AggregateFunctionUniqCombinedTraits::hash(value)); + } + else + { + StringRef value = columns[0]->getDataAt(row_num); + this->data(place).set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size)); + } + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + { + this->data(place).set.merge(this->data(rhs).set); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + this->data(place).set.write(buf); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + { + this->data(place).set.read(buf); + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + { + static_cast(to).getData().push_back(this->data(place).set.size()); + } + + const char * getHeaderFilePath() const override + { + return __FILE__; + } +}; + +/** For multiple arguments. To compute, hashes them. + * You can pass multiple arguments as is; You can also pass one argument - a tuple. + * But (for the possibility of efficient implementation), you can not pass several arguments, among which there are tuples. + */ +template +class AggregateFunctionUniqCombinedVariadic final : public IAggregateFunctionDataHelper, + AggregateFunctionUniqCombinedVariadic> +{ +private: + size_t num_args = 0; + +public: + explicit AggregateFunctionUniqCombinedVariadic(const DataTypes & arguments) + { + if (argument_is_tuple) + num_args = typeid_cast(*arguments[0]).getElements().size(); + else + num_args = arguments.size(); + } + + String getName() const override + { + return "uniqCombined"; + } + + DataTypePtr getReturnType() const override + { + return std::make_shared(); + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + { + this->data(place).set.insert(typename AggregateFunctionUniqCombinedData::Set::value_type( + UniqVariadicHash::apply(num_args, columns, row_num))); + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + { + this->data(place).set.merge(this->data(rhs).set); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + this->data(place).set.write(buf); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + { + this->data(place).set.read(buf); + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + { + static_cast(to).getData().push_back(this->data(place).set.size()); + } + + const char * getHeaderFilePath() const override + { + return __FILE__; + } +}; + +} // namespace DB diff --git a/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h index c9a4e6b32a..317637b1b6 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h @@ -16,10 +16,12 @@ namespace DB { + namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; } struct ComparePairFirst final @@ -191,7 +193,7 @@ public: const auto time_arg = arguments.front().get(); if (!WhichDataType(time_arg).isDateTime() && !WhichDataType(time_arg).isUInt32()) throw Exception{"Illegal type " + time_arg->getName() + " of first argument of aggregate function " + getName() - + ", must be DateTime or UInt32"}; + + ", must be DateTime or UInt32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; for (const auto i : ext::range(1, arguments.size())) { diff --git a/dbms/src/AggregateFunctions/CMakeLists.txt b/dbms/src/AggregateFunctions/CMakeLists.txt index ef2665d0d3..56cc66d391 100644 --- a/dbms/src/AggregateFunctions/CMakeLists.txt +++ b/dbms/src/AggregateFunctions/CMakeLists.txt @@ -20,5 +20,5 @@ list(REMOVE_ITEM clickhouse_aggregate_functions_headers ) add_library(clickhouse_aggregate_functions ${LINK_MODE} ${clickhouse_aggregate_functions_sources}) -target_link_libraries(clickhouse_aggregate_functions dbms) +target_link_libraries(clickhouse_aggregate_functions PRIVATE dbms) target_include_directories (clickhouse_aggregate_functions BEFORE PRIVATE ${COMMON_INCLUDE_DIR}) diff --git a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp index 3517ad57a7..800beda1d5 100644 --- a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -21,6 +21,7 @@ void registerAggregateFunctionsStatisticsSimple(AggregateFunctionFactory &); void registerAggregateFunctionSum(AggregateFunctionFactory &); void registerAggregateFunctionSumMap(AggregateFunctionFactory &); void registerAggregateFunctionsUniq(AggregateFunctionFactory &); +void registerAggregateFunctionUniqCombined(AggregateFunctionFactory &); void registerAggregateFunctionUniqUpTo(AggregateFunctionFactory &); void registerAggregateFunctionTopK(AggregateFunctionFactory &); void registerAggregateFunctionsBitwise(AggregateFunctionFactory &); @@ -55,6 +56,7 @@ void registerAggregateFunctions() registerAggregateFunctionSum(factory); registerAggregateFunctionSumMap(factory); registerAggregateFunctionsUniq(factory); + registerAggregateFunctionUniqCombined(factory); registerAggregateFunctionUniqUpTo(factory); registerAggregateFunctionTopK(factory); registerAggregateFunctionsBitwise(factory); diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index ce6246fba3..07d2ca05f2 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -108,14 +108,14 @@ void Connection::connect() disconnect(); /// Add server address to exception. Also Exception will remember stack trace. It's a pity that more precise exception type is lost. - throw NetException(e.displayText(), "(" + getDescription() + ")", ErrorCodes::NETWORK_ERROR); + throw NetException(e.displayText() + " (" + getDescription() + ")", ErrorCodes::NETWORK_ERROR); } catch (Poco::TimeoutException & e) { disconnect(); /// Add server address to exception. Also Exception will remember stack trace. It's a pity that more precise exception type is lost. - throw NetException(e.displayText(), "(" + getDescription() + ")", ErrorCodes::SOCKET_TIMEOUT); + throw NetException(e.displayText() + " (" + getDescription() + ")", ErrorCodes::SOCKET_TIMEOUT); } } diff --git a/dbms/src/Columns/ColumnConst.h b/dbms/src/Columns/ColumnConst.h index c9038cdf70..248bb04a18 100644 --- a/dbms/src/Columns/ColumnConst.h +++ b/dbms/src/Columns/ColumnConst.h @@ -209,7 +209,7 @@ public: Field getField() const { return getDataColumn()[0]; } template - T getValue() const { return getField().safeGet::Type>(); } + T getValue() const { return getField().safeGet>(); } }; } diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index 092ee74d66..e2cb798360 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -213,8 +213,8 @@ void ColumnDecimal::getExtremes(Field & min, Field & max) const { if (data.size() == 0) { - min = typename NearestFieldType::Type(0, scale); - max = typename NearestFieldType::Type(0, scale); + min = NearestFieldType(0, scale); + max = NearestFieldType(0, scale); return; } @@ -229,8 +229,8 @@ void ColumnDecimal::getExtremes(Field & min, Field & max) const cur_max = x; } - min = typename NearestFieldType::Type(cur_min, scale); - max = typename NearestFieldType::Type(cur_max, scale); + min = NearestFieldType(cur_min, scale); + max = NearestFieldType(cur_max, scale); } template class ColumnDecimal; diff --git a/dbms/src/Columns/ColumnDecimal.h b/dbms/src/Columns/ColumnDecimal.h index 523064167d..f6dee8877b 100644 --- a/dbms/src/Columns/ColumnDecimal.h +++ b/dbms/src/Columns/ColumnDecimal.h @@ -91,7 +91,7 @@ public: void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } void insertData(const char * pos, size_t /*length*/) override; void insertDefault() override { data.push_back(T()); } - void insert(const Field & x) override { data.push_back(DB::get::Type>(x)); } + void insert(const Field & x) override { data.push_back(DB::get>(x)); } void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void popBack(size_t n) override { data.resize_assume_reserved(data.size() - n); } diff --git a/dbms/src/Columns/ColumnFunction.cpp b/dbms/src/Columns/ColumnFunction.cpp index 852f6f03ad..8bc239eb65 100644 --- a/dbms/src/Columns/ColumnFunction.cpp +++ b/dbms/src/Columns/ColumnFunction.cpp @@ -183,7 +183,7 @@ void ColumnFunction::appendArgument(const ColumnWithTypeAndName & column) auto index = captured_columns.size(); if (!column.type->equals(*argumnet_types[index])) throw Exception("Cannot capture column " + std::to_string(argumnet_types.size()) + - "because it has incompatible type: got " + column.type->getName() + + " because it has incompatible type: got " + column.type->getName() + ", but " + argumnet_types[index]->getName() + " is expected.", ErrorCodes::LOGICAL_ERROR); captured_columns.push_back(column); diff --git a/dbms/src/Columns/ColumnUnique.h b/dbms/src/Columns/ColumnUnique.h index be5d71c3e2..7fb01620fb 100644 --- a/dbms/src/Columns/ColumnUnique.h +++ b/dbms/src/Columns/ColumnUnique.h @@ -81,12 +81,14 @@ public: { return column_holder->allocatedBytes() + index.allocatedBytes() - + (cached_null_mask ? cached_null_mask->allocatedBytes() : 0); + + (nested_null_mask ? nested_null_mask->allocatedBytes() : 0); } void forEachSubcolumn(IColumn::ColumnCallback callback) override { callback(column_holder); index.setColumn(getRawColumnPtr()); + if (is_nullable) + nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask); } const UInt64 * tryGetSavedHash() const override { return index.tryGetSavedHash(); } @@ -100,8 +102,8 @@ private: ReverseIndex index; /// For DataTypeNullable, stores null map. - mutable ColumnPtr cached_null_mask; - mutable ColumnPtr cached_column_nullable; + ColumnPtr nested_null_mask; + ColumnPtr nested_column_nullable; class IncrementalHash { @@ -118,6 +120,9 @@ private: mutable IncrementalHash hash; + void createNullMask(); + void updateNullMask(); + static size_t numSpecialValues(bool is_nullable) { return is_nullable ? 2 : 1; } size_t numSpecialValues() const { return numSpecialValues(is_nullable); } @@ -148,6 +153,7 @@ ColumnUnique::ColumnUnique(const ColumnUnique & other) , index(numSpecialValues(is_nullable), 0) { index.setColumn(getRawColumnPtr()); + createNullMask(); } template @@ -158,6 +164,7 @@ ColumnUnique::ColumnUnique(const IDataType & type) const auto & holder_type = is_nullable ? *static_cast(type).getNestedType() : type; column_holder = holder_type.createColumn()->cloneResized(numSpecialValues()); index.setColumn(getRawColumnPtr()); + createNullMask(); } template @@ -172,32 +179,51 @@ ColumnUnique::ColumnUnique(MutableColumnPtr && holder, bool is_nulla throw Exception("Holder column for ColumnUnique can't be nullable.", ErrorCodes::ILLEGAL_COLUMN); index.setColumn(getRawColumnPtr()); + createNullMask(); +} + +template +void ColumnUnique::createNullMask() +{ + if (is_nullable) + { + size_t size = getRawColumnPtr()->size(); + if (!nested_null_mask) + { + ColumnUInt8::MutablePtr null_mask = ColumnUInt8::create(size, UInt8(0)); + null_mask->getData()[getNullValueIndex()] = 1; + nested_null_mask = std::move(null_mask); + nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask); + } + else + throw Exception("Null mask for ColumnUnique is already created.", ErrorCodes::LOGICAL_ERROR); + } +} + +template +void ColumnUnique::updateNullMask() +{ + if (is_nullable) + { + if (!nested_null_mask) + throw Exception("Null mask for ColumnUnique is was not created.", ErrorCodes::LOGICAL_ERROR); + + size_t size = getRawColumnPtr()->size(); + + if (nested_null_mask->size() != size) + { + IColumn & null_mask = nested_null_mask->assumeMutableRef(); + static_cast(null_mask).getData().resize_fill(size); + } + } } template const ColumnPtr & ColumnUnique::getNestedColumn() const { if (is_nullable) - { - size_t size = getRawColumnPtr()->size(); - if (!cached_null_mask) - { - ColumnUInt8::MutablePtr null_mask = ColumnUInt8::create(size, UInt8(0)); - null_mask->getData()[getNullValueIndex()] = 1; - cached_null_mask = std::move(null_mask); - cached_column_nullable = ColumnNullable::create(column_holder, cached_null_mask); - } + return nested_column_nullable; - if (cached_null_mask->size() != size) - { - MutableColumnPtr null_mask = (*std::move(cached_null_mask)).mutate(); - static_cast(*null_mask).getData().resize_fill(size); - cached_null_mask = std::move(null_mask); - cached_column_nullable = ColumnNullable::create(column_holder, cached_null_mask); - } - - return cached_column_nullable; - } return column_holder; } @@ -227,6 +253,8 @@ size_t ColumnUnique::uniqueInsert(const Field & x) if (pos != prev_size) column->popBack(1); + updateNullMask(); + return pos; } @@ -260,6 +288,8 @@ size_t ColumnUnique::uniqueInsertData(const char * pos, size_t lengt index.insertFromLastRow(); } + updateNullMask(); + return insertion_point; } @@ -288,6 +318,8 @@ size_t ColumnUnique::uniqueInsertDataWithTerminatingZero(const char if (position != prev_size) column->popBack(1); + updateNullMask(); + return static_cast(position); } @@ -305,7 +337,7 @@ StringRef ColumnUnique::serializeValueIntoArena(size_t n, Arena & ar size_t nested_size = 0; - if (n == getNullValueIndex()) + if (n != getNullValueIndex()) nested_size = column_holder->serializeValueIntoArena(n, arena, begin).size; return StringRef(pos, sizeof(null_flag) + nested_size); @@ -343,6 +375,8 @@ size_t ColumnUnique::uniqueDeserializeAndInsertFromArena(const char if (index_pos != prev_size) column->popBack(1); + updateNullMask(); + return static_cast(index_pos); } @@ -533,6 +567,8 @@ MutableColumnPtr ColumnUnique::uniqueInsertRangeFrom(const IColumn & if (!positions_column) throw Exception("Can't find index type for ColumnUnique", ErrorCodes::LOGICAL_ERROR); + updateNullMask(); + return positions_column; } @@ -577,6 +613,8 @@ IColumnUnique::IndexesWithOverflow ColumnUnique::uniqueInsertRangeWi if (!positions_column) throw Exception("Can't find index type for ColumnUnique", ErrorCodes::LOGICAL_ERROR); + updateNullMask(); + IColumnUnique::IndexesWithOverflow indexes_with_overflow; indexes_with_overflow.indexes = std::move(positions_column); indexes_with_overflow.overflowed_keys = std::move(overflowed_keys); diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index d6fc07cd9d..e8aa9a756a 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -311,8 +311,8 @@ void ColumnVector::getExtremes(Field & min, Field & max) const cur_max = x; } - min = typename NearestFieldType::Type(cur_min); - max = typename NearestFieldType::Type(cur_max); + min = NearestFieldType(cur_min); + max = NearestFieldType(cur_max); } /// Explicit template instantiations - to avoid code bloat in headers. diff --git a/dbms/src/Columns/ColumnVector.h b/dbms/src/Columns/ColumnVector.h index c0c2020ba2..a2a947f421 100644 --- a/dbms/src/Columns/ColumnVector.h +++ b/dbms/src/Columns/ColumnVector.h @@ -244,7 +244,7 @@ public: void insert(const Field & x) override { - data.push_back(DB::get::Type>(x)); + data.push_back(DB::get>(x)); } void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/dbms/src/Columns/ReverseIndex.h b/dbms/src/Columns/ReverseIndex.h index cf73534316..a003e8282d 100644 --- a/dbms/src/Columns/ReverseIndex.h +++ b/dbms/src/Columns/ReverseIndex.h @@ -272,7 +272,7 @@ public: auto hash = calcHashes(); ptr = &hash->getData()[0]; UInt64 * expected = nullptr; - if(saved_hash_ptr.compare_exchange_strong(expected, ptr)) + if (saved_hash_ptr.compare_exchange_strong(expected, ptr)) saved_hash = std::move(hash); else ptr = expected; diff --git a/dbms/src/Common/ClickHouseRevision.cpp b/dbms/src/Common/ClickHouseRevision.cpp index ea48dcdc54..5513922a65 100644 --- a/dbms/src/Common/ClickHouseRevision.cpp +++ b/dbms/src/Common/ClickHouseRevision.cpp @@ -4,4 +4,5 @@ namespace ClickHouseRevision { unsigned get() { return VERSION_REVISION; } + unsigned getVersionInteger() { return VERSION_INTEGER; } } diff --git a/dbms/src/Common/ClickHouseRevision.h b/dbms/src/Common/ClickHouseRevision.h index 7c147fe6f8..1d097a5bf8 100644 --- a/dbms/src/Common/ClickHouseRevision.h +++ b/dbms/src/Common/ClickHouseRevision.h @@ -3,4 +3,5 @@ namespace ClickHouseRevision { unsigned get(); + unsigned getVersionInteger(); } diff --git a/dbms/src/Common/Config/CMakeLists.txt b/dbms/src/Common/Config/CMakeLists.txt index 614e70e757..a1bb2790fd 100644 --- a/dbms/src/Common/Config/CMakeLists.txt +++ b/dbms/src/Common/Config/CMakeLists.txt @@ -1,9 +1,8 @@ - include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) add_headers_and_sources(clickhouse_common_config .) add_library(clickhouse_common_config ${LINK_MODE} ${clickhouse_common_config_headers} ${clickhouse_common_config_sources}) -target_link_libraries (clickhouse_common_config clickhouse_common_zookeeper string_utils ${Poco_XML_LIBRARY} ${Poco_Util_LIBRARY}) -target_include_directories (clickhouse_common_config PRIVATE ${DBMS_INCLUDE_DIR}) +target_link_libraries(clickhouse_common_config PUBLIC common PRIVATE clickhouse_common_zookeeper string_utils PUBLIC ${Poco_XML_LIBRARY} ${Poco_Util_LIBRARY}) +target_include_directories(clickhouse_common_config PUBLIC ${DBMS_INCLUDE_DIR}) diff --git a/dbms/src/Common/CounterInFile.h b/dbms/src/Common/CounterInFile.h index 6ea34362a5..2c831e3330 100644 --- a/dbms/src/Common/CounterInFile.h +++ b/dbms/src/Common/CounterInFile.h @@ -18,7 +18,15 @@ #include -#define SMALL_READ_WRITE_BUFFER_SIZE 16 +namespace DB +{ + namespace ErrorCodes + { + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_READ_ALL_DATA; + extern const int ATTEMPT_TO_READ_AFTER_EOF; + } +} /** Stores a number in the file. @@ -26,6 +34,9 @@ */ class CounterInFile { +private: + static inline constexpr size_t SMALL_READ_WRITE_BUFFER_SIZE = 16; + public: /// path - the name of the file, including the path CounterInFile(const std::string & path_) : path(path_) {} @@ -56,13 +67,13 @@ public: int fd = ::open(path.c_str(), O_RDWR | O_CREAT, 0666); if (-1 == fd) - DB::throwFromErrno("Cannot open file " + path); + DB::throwFromErrno("Cannot open file " + path, DB::ErrorCodes::CANNOT_OPEN_FILE); try { int flock_ret = flock(fd, LOCK_EX); if (-1 == flock_ret) - DB::throwFromErrno("Cannot lock file " + path); + DB::throwFromErrno("Cannot lock file " + path, DB::ErrorCodes::CANNOT_OPEN_FILE); if (!file_doesnt_exists) { @@ -130,7 +141,7 @@ public: int fd = ::open(path.c_str(), O_RDWR | O_CREAT, 0666); if (-1 == fd) - DB::throwFromErrno("Cannot open file " + path); + DB::throwFromErrno("Cannot open file " + path, DB::ErrorCodes::CANNOT_OPEN_FILE); try { @@ -178,6 +189,3 @@ private: std::string path; std::mutex mutex; }; - - -#undef SMALL_READ_WRITE_BUFFER_SIZE diff --git a/dbms/src/Common/CurrentMetrics.cpp b/dbms/src/Common/CurrentMetrics.cpp index 59b26f4b5e..2f8346d554 100644 --- a/dbms/src/Common/CurrentMetrics.cpp +++ b/dbms/src/Common/CurrentMetrics.cpp @@ -39,7 +39,8 @@ M(StorageBufferRows, "Number of rows in buffers of Buffer tables") \ M(StorageBufferBytes, "Number of bytes in buffers of Buffer tables") \ M(DictCacheRequests, "Number of requests in fly to data sources of dictionaries of cache type.") \ - M(Revision, "Revision of the server. It is a number incremented for every release or release candidate.") \ + M(Revision, "Revision of the server. It is a number incremented for every release or release candidate except patch releases.") \ + M(VersionInteger, "Version of the server in a single integer number in base-1000. For example, version 11.22.33 is translated to 11022033.") \ M(RWLockWaitingReaders, "Number of threads waiting for read on a table RWLock.") \ M(RWLockWaitingWriters, "Number of threads waiting for write on a table RWLock.") \ M(RWLockActiveReaders, "Number of threads holding read lock in a table RWLock.") \ diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 4e724c995c..719e593e3b 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -396,6 +396,11 @@ namespace ErrorCodes extern const int MULTIPLE_ASSIGNMENTS_TO_COLUMN = 419; extern const int CANNOT_UPDATE_COLUMN = 420; extern const int CANNOT_ADD_DIFFERENT_AGGREGATE_STATES = 421; + extern const int UNSUPPORTED_URI_SCHEME = 422; + extern const int CANNOT_GETTIMEOFDAY = 423; + extern const int CANNOT_LINK = 424; + extern const int SYSTEM_ERROR = 425; + extern const int NULL_POINTER_DEREFERENCE = 426; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index 606f180e9d..a7bfbd6442 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -52,7 +52,7 @@ std::string errnoToString(int code, int e) void throwFromErrno(const std::string & s, int code, int e) { - throw ErrnoException(s + ", " + errnoToString(code, e)); + throw ErrnoException(s + ", " + errnoToString(code, e), code, e); } void tryLogCurrentException(const char * log_name, const std::string & start_of_message) diff --git a/dbms/src/Common/Exception.h b/dbms/src/Common/Exception.h index 4e3bdc8baf..d5e9d3112e 100644 --- a/dbms/src/Common/Exception.h +++ b/dbms/src/Common/Exception.h @@ -14,19 +14,25 @@ namespace Poco { class Logger; } namespace DB { +namespace ErrorCodes +{ + extern const int POCO_EXCEPTION; +} + class Exception : public Poco::Exception { public: Exception() {} /// For deferred initialization. - Exception(const std::string & msg, int code = 0) : Poco::Exception(msg, code) {} - Exception(const std::string & msg, const std::string & arg, int code = 0) : Poco::Exception(msg, arg, code) {} - Exception(const std::string & msg, const Exception & exc, int code = 0) : Poco::Exception(msg, exc, code), trace(exc.trace) {} - explicit Exception(const Poco::Exception & exc) : Poco::Exception(exc.displayText()) {} + Exception(const std::string & msg, int code) : Poco::Exception(msg, code) {} + Exception(const std::string & msg, const Exception & nested_exception, int code) + : Poco::Exception(msg, nested_exception, code), trace(nested_exception.trace) {} - const char * name() const throw() override { return "DB::Exception"; } - const char * className() const throw() override { return "DB::Exception"; } - DB::Exception * clone() const override { return new DB::Exception(*this); } + enum CreateFromPocoTag { CreateFromPoco }; + Exception(CreateFromPocoTag, const Poco::Exception & exc) : Poco::Exception(exc.displayText(), ErrorCodes::POCO_EXCEPTION) {} + + Exception * clone() const override { return new Exception(*this); } void rethrow() const override { throw *this; } + const char * name() const throw() override { return "DB::Exception"; } /// Add something to the existing message. void addMessage(const std::string & arg) { extendedMessage(arg); } @@ -35,6 +41,8 @@ public: private: StackTrace trace; + + const char * className() const throw() override { return "DB::Exception"; } }; @@ -42,25 +50,27 @@ private: class ErrnoException : public Exception { public: - ErrnoException(const std::string & msg, int code = 0, int saved_errno_ = 0) + ErrnoException(const std::string & msg, int code, int saved_errno_) : Exception(msg, code), saved_errno(saved_errno_) {} - ErrnoException(const std::string & msg, const std::string & arg, int code = 0, int saved_errno_ = 0) - : Exception(msg, arg, code), saved_errno(saved_errno_) {} - ErrnoException(const std::string & msg, const Exception & exc, int code = 0, int saved_errno_ = 0) - : Exception(msg, exc, code), saved_errno(saved_errno_) {} + + ErrnoException * clone() const override { return new ErrnoException(*this); } + void rethrow() const override { throw *this; } int getErrno() const { return saved_errno; } private: int saved_errno; + + const char * name() const throw() override { return "DB::ErrnoException"; } + const char * className() const throw() override { return "DB::ErrnoException"; } }; using Exceptions = std::vector; -std::string errnoToString(int code = 0, int the_errno = errno); -[[noreturn]] void throwFromErrno(const std::string & s, int code = 0, int the_errno = errno); +std::string errnoToString(int code, int the_errno = errno); +[[noreturn]] void throwFromErrno(const std::string & s, int code, int the_errno = errno); /** Try to write an exception to the log (and forget about it). diff --git a/dbms/src/Common/HashTable/SmallTable.h b/dbms/src/Common/HashTable/SmallTable.h index 73c2b35865..27dc8c0033 100644 --- a/dbms/src/Common/HashTable/SmallTable.h +++ b/dbms/src/Common/HashTable/SmallTable.h @@ -3,6 +3,15 @@ #include +namespace DB +{ + namespace ErrorCodes + { + extern const int INCORRECT_DATA; + } +} + + /** Replacement of the hash table for a small number (<10) of keys. * Implemented as an array with linear search. * The array is located inside the object. @@ -13,7 +22,6 @@ * you should check if the table is not full, * and do a `fallback` in this case (for example, use a real hash table). */ - template < typename Key, @@ -86,7 +94,7 @@ public: DB::readVarUInt(size, in); if (size > capacity) - throw DB::Exception("Illegal size"); + throw DB::Exception("Illegal size", DB::ErrorCodes::INCORRECT_DATA); is_initialized = true; } @@ -306,7 +314,7 @@ public: DB::readVarUInt(new_size, rb); if (new_size > capacity) - throw DB::Exception("Illegal size"); + throw DB::Exception("Illegal size", DB::ErrorCodes::INCORRECT_DATA); for (size_t i = 0; i < new_size; ++i) buf[i].read(rb); @@ -324,7 +332,7 @@ public: DB::readText(new_size, rb); if (new_size > capacity) - throw DB::Exception("Illegal size"); + throw DB::Exception("Illegal size", DB::ErrorCodes::INCORRECT_DATA); for (size_t i = 0; i < new_size; ++i) { diff --git a/dbms/src/Common/NetException.h b/dbms/src/Common/NetException.h index b2b12e8564..ff7d7c5c5f 100644 --- a/dbms/src/Common/NetException.h +++ b/dbms/src/Common/NetException.h @@ -6,20 +6,17 @@ namespace DB { -class NetException : public DB::Exception +class NetException : public Exception { public: - NetException(const std::string & msg, int code = 0) : DB::Exception(msg, code) {} - NetException(const std::string & msg, const std::string & arg, int code = 0) : DB::Exception(msg, arg, code) {} - NetException(const std::string & msg, const DB::Exception & exc, int code = 0) : DB::Exception(msg, exc, code) {} + NetException(const std::string & msg, int code) : Exception(msg, code) {} - explicit NetException(const DB::Exception & exc) : DB::Exception(exc) {} - explicit NetException(const Poco::Exception & exc) : DB::Exception(exc.displayText()) {} + NetException * clone() const override { return new NetException(*this); } + void rethrow() const override { throw *this; } +private: const char * name() const throw() override { return "DB::NetException"; } const char * className() const throw() override { return "DB::NetException"; } - DB::NetException * clone() const override { return new DB::NetException(*this); } - void rethrow() const override { throw *this; } }; } diff --git a/dbms/src/Common/PoolWithFailoverBase.h b/dbms/src/Common/PoolWithFailoverBase.h index 63420f12ac..dc986a44ff 100644 --- a/dbms/src/Common/PoolWithFailoverBase.h +++ b/dbms/src/Common/PoolWithFailoverBase.h @@ -305,7 +305,7 @@ void PoolWithFailoverBase::reportError(const Entry & entry) return; } } - throw DB::Exception("Can't find pool to report error."); + throw DB::Exception("Can't find pool to report error", DB::ErrorCodes::LOGICAL_ERROR); } template diff --git a/dbms/src/Common/ProfileEvents.cpp b/dbms/src/Common/ProfileEvents.cpp index de50c625f1..7059e02d76 100644 --- a/dbms/src/Common/ProfileEvents.cpp +++ b/dbms/src/Common/ProfileEvents.cpp @@ -170,6 +170,7 @@ M(OSWriteBytes, "Number of bytes written to disks or block devices. Doesn't include bytes that are in page cache dirty pages. May not include data that was written by OS asynchronously.") \ M(OSReadChars, "Number of bytes read from filesystem, including page cache.") \ M(OSWriteChars, "Number of bytes written to filesystem, including page cache.") \ + M(CreatedHTTPConnections, "Total amount of created HTTP connections (closed or opened).") \ namespace ProfileEvents { diff --git a/dbms/src/Common/ShellCommand.cpp b/dbms/src/Common/ShellCommand.cpp index c9e625810c..675dd8391f 100644 --- a/dbms/src/Common/ShellCommand.cpp +++ b/dbms/src/Common/ShellCommand.cpp @@ -4,9 +4,11 @@ #include #include #include +#include #include #include #include +#include namespace DB @@ -75,13 +77,27 @@ namespace namespace DB { +ShellCommand::ShellCommand(pid_t pid, int in_fd, int out_fd, int err_fd, bool terminate_in_destructor_) + : pid(pid) + , terminate_in_destructor(terminate_in_destructor_) + , log(&Poco::Logger::get("ShellCommand")) + , in(in_fd) + , out(out_fd) + , err(err_fd) {} + ShellCommand::~ShellCommand() { - if (!wait_called) + if (terminate_in_destructor) + { + int retcode = kill(pid, SIGTERM); + if (retcode != 0) + LOG_WARNING(log, "Cannot kill pid " << pid << " errno '" << errnoToString(retcode) << "'"); + } + else if (!wait_called) tryWait(); } -std::unique_ptr ShellCommand::executeImpl(const char * filename, char * const argv[], bool pipe_stdin_only) +std::unique_ptr ShellCommand::executeImpl(const char * filename, char * const argv[], bool pipe_stdin_only, bool terminate_in_destructor) { /** Here it is written that with a normal call `vfork`, there is a chance of deadlock in multithreaded programs, * because of the resolving of characters in the shared library @@ -128,7 +144,7 @@ std::unique_ptr ShellCommand::executeImpl(const char * filename, c _exit(int(ReturnCodes::CANNOT_EXEC)); } - std::unique_ptr res(new ShellCommand(pid, pipe_stdin.write_fd, pipe_stdout.read_fd, pipe_stderr.read_fd)); + std::unique_ptr res(new ShellCommand(pid, pipe_stdin.write_fd, pipe_stdout.read_fd, pipe_stderr.read_fd, terminate_in_destructor)); /// Now the ownership of the file descriptors is passed to the result. pipe_stdin.write_fd = -1; @@ -139,7 +155,7 @@ std::unique_ptr ShellCommand::executeImpl(const char * filename, c } -std::unique_ptr ShellCommand::execute(const std::string & command, bool pipe_stdin_only) +std::unique_ptr ShellCommand::execute(const std::string & command, bool pipe_stdin_only, bool terminate_in_destructor) { /// Arguments in non-constant chunks of memory (as required for `execv`). /// Moreover, their copying must be done before calling `vfork`, so after `vfork` do a minimum of things. @@ -149,11 +165,11 @@ std::unique_ptr ShellCommand::execute(const std::string & command, char * const argv[] = { argv0.data(), argv1.data(), argv2.data(), nullptr }; - return executeImpl("/bin/sh", argv, pipe_stdin_only); + return executeImpl("/bin/sh", argv, pipe_stdin_only, terminate_in_destructor); } -std::unique_ptr ShellCommand::executeDirect(const std::string & path, const std::vector & arguments) +std::unique_ptr ShellCommand::executeDirect(const std::string & path, const std::vector & arguments, bool terminate_in_destructor) { size_t argv_sum_size = path.size() + 1; for (const auto & arg : arguments) @@ -174,7 +190,7 @@ std::unique_ptr ShellCommand::executeDirect(const std::string & pa argv[arguments.size() + 1] = nullptr; - return executeImpl(path.data(), argv.data(), false); + return executeImpl(path.data(), argv.data(), false, terminate_in_destructor); } diff --git a/dbms/src/Common/ShellCommand.h b/dbms/src/Common/ShellCommand.h index a439568fe0..3d1308272e 100644 --- a/dbms/src/Common/ShellCommand.h +++ b/dbms/src/Common/ShellCommand.h @@ -28,11 +28,13 @@ class ShellCommand private: pid_t pid; bool wait_called = false; + bool terminate_in_destructor; - ShellCommand(pid_t pid, int in_fd, int out_fd, int err_fd) - : pid(pid), in(in_fd), out(out_fd), err(err_fd) {} + Poco::Logger * log; - static std::unique_ptr executeImpl(const char * filename, char * const argv[], bool pipe_stdin_only); + ShellCommand(pid_t pid, int in_fd, int out_fd, int err_fd, bool terminate_in_destructor_); + + static std::unique_ptr executeImpl(const char * filename, char * const argv[], bool pipe_stdin_only, bool terminate_in_destructor); public: WriteBufferFromFile in; /// If the command reads from stdin, do not forget to call in.close() after writing all the data there. @@ -41,11 +43,13 @@ public: ~ShellCommand(); - /// Run the command using /bin/sh -c - static std::unique_ptr execute(const std::string & command, bool pipe_stdin_only = false); + /// Run the command using /bin/sh -c. + /// If terminate_in_destructor is true, send terminate signal in destructor and don't wait process. + static std::unique_ptr execute(const std::string & command, bool pipe_stdin_only = false, bool terminate_in_destructor = false); /// Run the executable with the specified arguments. `arguments` - without argv[0]. - static std::unique_ptr executeDirect(const std::string & path, const std::vector & arguments); + /// If terminate_in_destructor is true, send terminate signal in destructor and don't wait process. + static std::unique_ptr executeDirect(const std::string & path, const std::vector & arguments, bool terminate_in_destructor = false); /// Wait for the process to end, throw an exception if the code is not 0 or if the process was not completed by itself. void wait(); diff --git a/dbms/src/Common/StatusFile.cpp b/dbms/src/Common/StatusFile.cpp index 84b1edc922..afe42262b5 100644 --- a/dbms/src/Common/StatusFile.cpp +++ b/dbms/src/Common/StatusFile.cpp @@ -20,6 +20,14 @@ namespace DB { +namespace ErrorCodes +{ + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_CLOSE_FILE; + extern const int CANNOT_TRUNCATE_FILE; + extern const int CANNOT_SEEK_THROUGH_FILE; +} + StatusFile::StatusFile(const std::string & path_) : path(path_) @@ -43,7 +51,7 @@ StatusFile::StatusFile(const std::string & path_) fd = ::open(path.c_str(), O_WRONLY | O_CREAT, 0666); if (-1 == fd) - throwFromErrno("Cannot open file " + path); + throwFromErrno("Cannot open file " + path, ErrorCodes::CANNOT_OPEN_FILE); try { @@ -51,16 +59,16 @@ StatusFile::StatusFile(const std::string & path_) if (-1 == flock_ret) { if (errno == EWOULDBLOCK) - throw Exception("Cannot lock file " + path + ". Another server instance in same directory is already running."); + throw Exception("Cannot lock file " + path + ". Another server instance in same directory is already running.", ErrorCodes::CANNOT_OPEN_FILE); else - throwFromErrno("Cannot lock file " + path); + throwFromErrno("Cannot lock file " + path, ErrorCodes::CANNOT_OPEN_FILE); } if (0 != ftruncate(fd, 0)) - throwFromErrno("Cannot ftruncate " + path); + throwFromErrno("Cannot ftruncate " + path, ErrorCodes::CANNOT_TRUNCATE_FILE); if (0 != lseek(fd, 0, SEEK_SET)) - throwFromErrno("Cannot lseek " + path); + throwFromErrno("Cannot lseek " + path, ErrorCodes::CANNOT_SEEK_THROUGH_FILE); /// Write information about current server instance to the file. { @@ -82,10 +90,10 @@ StatusFile::StatusFile(const std::string & path_) StatusFile::~StatusFile() { if (0 != close(fd)) - LOG_ERROR(&Logger::get("StatusFile"), "Cannot close file " << path << ", " << errnoToString()); + LOG_ERROR(&Logger::get("StatusFile"), "Cannot close file " << path << ", " << errnoToString(ErrorCodes::CANNOT_CLOSE_FILE)); if (0 != unlink(path.c_str())) - LOG_ERROR(&Logger::get("StatusFile"), "Cannot unlink file " << path << ", " << errnoToString()); + LOG_ERROR(&Logger::get("StatusFile"), "Cannot unlink file " << path << ", " << errnoToString(ErrorCodes::CANNOT_CLOSE_FILE)); } } diff --git a/dbms/src/Common/XDBCBridgeHelper.h b/dbms/src/Common/XDBCBridgeHelper.h index efdf7b401d..3ff91c902f 100644 --- a/dbms/src/Common/XDBCBridgeHelper.h +++ b/dbms/src/Common/XDBCBridgeHelper.h @@ -68,6 +68,7 @@ protected: public: using Configuration = Poco::Util::AbstractConfiguration; + Context & context; const Configuration & config; static constexpr inline auto DEFAULT_HOST = "localhost"; @@ -78,8 +79,8 @@ public: static constexpr inline auto IDENTIFIER_QUOTE_HANDLER = "/identifier_quote"; static constexpr inline auto PING_OK_ANSWER = "Ok."; - XDBCBridgeHelper(const Configuration & config_, const Poco::Timespan & http_timeout_, const std::string & connection_string_) - : http_timeout(http_timeout_), connection_string(connection_string_), config(config_) + XDBCBridgeHelper(Context & global_context_, const Poco::Timespan & http_timeout_, const std::string & connection_string_) + : http_timeout(http_timeout_), connection_string(connection_string_), context(global_context_), config(context.getConfigRef()) { size_t bridge_port = config.getUInt(BridgeHelperMixin::configPrefix() + ".port", DEFAULT_PORT); std::string bridge_host = config.getString(BridgeHelperMixin::configPrefix() + ".host", DEFAULT_HOST); @@ -210,7 +211,8 @@ private: /* Contains logic for instantiation of the bridge instance */ void startBridge() const { - BridgeHelperMixin::startBridge(config, log, http_timeout); + auto cmd = BridgeHelperMixin::startBridge(config, log, http_timeout); + context.addXDBCBridgeCommand(std::move(cmd)); } }; @@ -230,7 +232,7 @@ struct JDBCBridgeMixin return "JDBC"; } - static void startBridge(const Poco::Util::AbstractConfiguration &, const Poco::Logger *, const Poco::Timespan &) + static std::unique_ptr startBridge(const Poco::Util::AbstractConfiguration &, const Poco::Logger *, const Poco::Timespan &) { throw Exception("jdbc-bridge is not running. Please, start it manually", ErrorCodes::EXTERNAL_SERVER_IS_NOT_RESPONDING); } @@ -253,11 +255,13 @@ struct ODBCBridgeMixin return "ODBC"; } - static void startBridge(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log, const Poco::Timespan & http_timeout) + static std::unique_ptr startBridge(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log, const Poco::Timespan & http_timeout) { /// Path to executable folder Poco::Path path{config.getString("application.dir", "/usr/bin")}; + + std::vector cmd_args; path.setFileName( #if CLICKHOUSE_SPLIT_BINARY "clickhouse-odbc-bridge" @@ -268,34 +272,35 @@ struct ODBCBridgeMixin std::stringstream command; - command << path.toString() << -#if CLICKHOUSE_SPLIT_BINARY - " " -#else - " odbc-bridge " +#if !CLICKHOUSE_SPLIT_BINARY + cmd_args.push_back("odbc-bridge"); #endif - ; - command << "--http-port " << config.getUInt(configPrefix() + ".port", DEFAULT_PORT) << ' '; - command << "--listen-host " << config.getString(configPrefix() + ".listen_host", XDBCBridgeHelper::DEFAULT_HOST) - << ' '; - command << "--http-timeout " << http_timeout.totalMicroseconds() << ' '; + cmd_args.push_back("--http-port"); + cmd_args.push_back(std::to_string(config.getUInt(configPrefix() + ".port", DEFAULT_PORT))); + cmd_args.push_back("--listen-host"); + cmd_args.push_back(config.getString(configPrefix() + ".listen_host", XDBCBridgeHelper::DEFAULT_HOST)); + cmd_args.push_back("--http-timeout"); + cmd_args.push_back(std::to_string(http_timeout.totalMicroseconds())); if (config.has("logger." + configPrefix() + "_log")) - command << "--log-path " << config.getString("logger." + configPrefix() + "_log") << ' '; + { + cmd_args.push_back("--log-path"); + cmd_args.push_back(config.getString("logger." + configPrefix() + "_log")); + } if (config.has("logger." + configPrefix() + "_errlog")) - command << "--err-log-path " << config.getString("logger." + configPrefix() + "_errlog") << ' '; + { + cmd_args.push_back("--err-log-path"); + cmd_args.push_back(config.getString("logger." + configPrefix() + "_errlog")); + } if (config.has("logger." + configPrefix() + "_level")) - command << "--log-level " << config.getString("logger." + configPrefix() + "_level") << ' '; - command << "&"; /// we don't want to wait this process + { + cmd_args.push_back("--log-level"); + cmd_args.push_back(config.getString("logger." + configPrefix() + "_level")); + } - auto command_str = command.str(); + LOG_TRACE(log, "Starting " + serviceAlias()); - std::cerr << command_str << std::endl; - - LOG_TRACE(log, "Starting " + serviceAlias() + " with command: " << command_str); - - auto cmd = ShellCommand::execute(command_str); - cmd->wait(); + return ShellCommand::executeDirect(path.toString(), cmd_args, true); } }; } diff --git a/dbms/src/Common/ZooKeeper/CMakeLists.txt b/dbms/src/Common/ZooKeeper/CMakeLists.txt index 2969d9a91e..1f69f0af1e 100644 --- a/dbms/src/Common/ZooKeeper/CMakeLists.txt +++ b/dbms/src/Common/ZooKeeper/CMakeLists.txt @@ -4,7 +4,8 @@ add_headers_and_sources(clickhouse_common_zookeeper .) add_library(clickhouse_common_zookeeper ${LINK_MODE} ${clickhouse_common_zookeeper_headers} ${clickhouse_common_zookeeper_sources}) -target_link_libraries (clickhouse_common_zookeeper clickhouse_common_io) +target_link_libraries (clickhouse_common_zookeeper PUBLIC clickhouse_common_io common PRIVATE string_utils PUBLIC ${Poco_Util_LIBRARY}) +target_include_directories(clickhouse_common_zookeeper PUBLIC ${DBMS_INCLUDE_DIR}) if (ENABLE_TESTS) add_subdirectory (tests) diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperHolder.cpp b/dbms/src/Common/ZooKeeper/ZooKeeperHolder.cpp index 1b49c22cf4..9343bb4add 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperHolder.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeperHolder.cpp @@ -1,5 +1,15 @@ #include "ZooKeeperHolder.h" + +namespace DB +{ + namespace ErrorCodes + { + extern const int NULL_POINTER_DEREFERENCE; + } +} + + using namespace zkutil; ZooKeeperHolder::UnstorableZookeeperHandler ZooKeeperHolder::getZooKeeper() @@ -47,7 +57,7 @@ ZooKeeperHolder::UnstorableZookeeperHandler::UnstorableZookeeperHandler(ZooKeepe ZooKeeper * ZooKeeperHolder::UnstorableZookeeperHandler::operator->() { if (zk_ptr == nullptr) - throw DB::Exception(nullptr_exception_message); + throw DB::Exception(nullptr_exception_message, DB::ErrorCodes::NULL_POINTER_DEREFERENCE); return zk_ptr.get(); } @@ -55,20 +65,20 @@ ZooKeeper * ZooKeeperHolder::UnstorableZookeeperHandler::operator->() const ZooKeeper * ZooKeeperHolder::UnstorableZookeeperHandler::operator->() const { if (zk_ptr == nullptr) - throw DB::Exception(nullptr_exception_message); + throw DB::Exception(nullptr_exception_message, DB::ErrorCodes::NULL_POINTER_DEREFERENCE); return zk_ptr.get(); } ZooKeeper & ZooKeeperHolder::UnstorableZookeeperHandler::operator*() { if (zk_ptr == nullptr) - throw DB::Exception(nullptr_exception_message); + throw DB::Exception(nullptr_exception_message, DB::ErrorCodes::NULL_POINTER_DEREFERENCE); return *zk_ptr; } const ZooKeeper & ZooKeeperHolder::UnstorableZookeeperHandler::operator*() const { if (zk_ptr == nullptr) - throw DB::Exception(nullptr_exception_message); + throw DB::Exception(nullptr_exception_message, DB::ErrorCodes::NULL_POINTER_DEREFERENCE); return *zk_ptr; } diff --git a/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt b/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt index a24948240f..06716e4991 100644 --- a/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt +++ b/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt @@ -1,23 +1,23 @@ add_executable(zkutil_test_commands zkutil_test_commands.cpp) -target_link_libraries(zkutil_test_commands clickhouse_common_zookeeper) +target_link_libraries(zkutil_test_commands PRIVATE clickhouse_common_zookeeper) add_executable(zkutil_test_commands_new_lib zkutil_test_commands_new_lib.cpp) -target_link_libraries(zkutil_test_commands_new_lib clickhouse_common_zookeeper) +target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper) add_executable(zkutil_test_lock zkutil_test_lock.cpp) -target_link_libraries(zkutil_test_lock clickhouse_common_zookeeper) +target_link_libraries(zkutil_test_lock PRIVATE clickhouse_common_zookeeper) add_executable(zkutil_expiration_test zkutil_expiration_test.cpp) -target_link_libraries(zkutil_expiration_test clickhouse_common_zookeeper) +target_link_libraries(zkutil_expiration_test PRIVATE clickhouse_common_zookeeper) add_executable(zkutil_test_async zkutil_test_async.cpp) -target_link_libraries(zkutil_test_async clickhouse_common_zookeeper) +target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper) add_executable(zkutil_zookeeper_holder zkutil_zookeeper_holder.cpp) -target_link_libraries(zkutil_zookeeper_holder clickhouse_common_zookeeper) +target_link_libraries(zkutil_zookeeper_holder PRIVATE clickhouse_common_zookeeper) add_executable (zk_many_watches_reconnect zk_many_watches_reconnect.cpp) -target_link_libraries (zk_many_watches_reconnect clickhouse_common_zookeeper clickhouse_common_config) +target_link_libraries (zk_many_watches_reconnect PRIVATE clickhouse_common_zookeeper clickhouse_common_config) add_executable (zookeeper_impl zookeeper_impl.cpp) -target_link_libraries (zookeeper_impl clickhouse_common_zookeeper) +target_link_libraries (zookeeper_impl PRIVATE clickhouse_common_zookeeper) diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index 27c5e96524..a06970384e 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -16,3 +16,4 @@ #cmakedefine01 USE_POCO_NETSSL #cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 ENABLE_INSERT_INFILE +#cmakedefine01 USE_BASE64 diff --git a/dbms/src/Common/config_version.h.in b/dbms/src/Common/config_version.h.in index 8f3e2f56c8..a90fd77b6a 100644 --- a/dbms/src/Common/config_version.h.in +++ b/dbms/src/Common/config_version.h.in @@ -23,6 +23,7 @@ #cmakedefine VERSION_FULL "@VERSION_FULL@" #cmakedefine VERSION_DESCRIBE "@VERSION_DESCRIBE@" #cmakedefine VERSION_GITHASH "@VERSION_GITHASH@" +#cmakedefine VERSION_INTEGER @VERSION_INTEGER@ #if defined(VERSION_MAJOR) #define DBMS_VERSION_MAJOR VERSION_MAJOR diff --git a/dbms/src/Common/createHardLink.cpp b/dbms/src/Common/createHardLink.cpp index 06647cd437..824b7e9908 100644 --- a/dbms/src/Common/createHardLink.cpp +++ b/dbms/src/Common/createHardLink.cpp @@ -8,6 +8,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int CANNOT_STAT; + extern const int CANNOT_LINK; +} + void createHardLink(const String & source_path, const String & destination_path) { if (0 != link(source_path.c_str(), destination_path.c_str())) @@ -20,16 +26,16 @@ void createHardLink(const String & source_path, const String & destination_path) struct stat destination_descr; if (0 != lstat(source_path.c_str(), &source_descr)) - throwFromErrno("Cannot stat " + source_path); + throwFromErrno("Cannot stat " + source_path, ErrorCodes::CANNOT_STAT); if (0 != lstat(destination_path.c_str(), &destination_descr)) - throwFromErrno("Cannot stat " + destination_path); + throwFromErrno("Cannot stat " + destination_path, ErrorCodes::CANNOT_STAT); if (source_descr.st_ino != destination_descr.st_ino) - throwFromErrno("Destination file " + destination_path + " is already exist and have different inode.", 0, link_errno); + throwFromErrno("Destination file " + destination_path + " is already exist and have different inode.", ErrorCodes::CANNOT_LINK, link_errno); } else - throwFromErrno("Cannot link " + source_path + " to " + destination_path); + throwFromErrno("Cannot link " + source_path + " to " + destination_path, ErrorCodes::CANNOT_LINK); } } diff --git a/dbms/src/Common/setThreadName.cpp b/dbms/src/Common/setThreadName.cpp index fc228d78ff..d035822974 100644 --- a/dbms/src/Common/setThreadName.cpp +++ b/dbms/src/Common/setThreadName.cpp @@ -32,7 +32,7 @@ void setThreadName(const char * name) #else if (0 != prctl(PR_SET_NAME, name, 0, 0, 0)) #endif - DB::throwFromErrno("Cannot set thread name with prctl(PR_SET_NAME, ...)"); + DB::throwFromErrno("Cannot set thread name with prctl(PR_SET_NAME, ...)", DB::ErrorCodes::PTHREAD_ERROR); } std::string getThreadName() @@ -48,7 +48,7 @@ std::string getThreadName() // throw DB::Exception("Cannot get thread name with pthread_get_name_np()", DB::ErrorCodes::PTHREAD_ERROR); #else if (0 != prctl(PR_GET_NAME, name.data(), 0, 0, 0)) - DB::throwFromErrno("Cannot get thread name with prctl(PR_GET_NAME)"); + DB::throwFromErrno("Cannot get thread name with prctl(PR_GET_NAME)", DB::ErrorCodes::PTHREAD_ERROR); #endif name.resize(std::strlen(name.data())); diff --git a/dbms/src/Common/tests/CMakeLists.txt b/dbms/src/Common/tests/CMakeLists.txt index f4d01e85bd..802963bf40 100644 --- a/dbms/src/Common/tests/CMakeLists.txt +++ b/dbms/src/Common/tests/CMakeLists.txt @@ -1,73 +1,73 @@ add_executable (hashes_test hashes_test.cpp) -target_link_libraries (hashes_test dbms) +target_link_libraries (hashes_test PRIVATE dbms) add_executable (sip_hash sip_hash.cpp) -target_link_libraries (sip_hash clickhouse_common_io) +target_link_libraries (sip_hash PRIVATE clickhouse_common_io) add_executable (sip_hash_perf sip_hash_perf.cpp) -target_link_libraries (sip_hash_perf clickhouse_common_io) +target_link_libraries (sip_hash_perf PRIVATE clickhouse_common_io) add_executable (auto_array auto_array.cpp) -target_link_libraries (auto_array clickhouse_common_io) +target_link_libraries (auto_array PRIVATE clickhouse_common_io) add_executable (lru_cache lru_cache.cpp) -target_link_libraries (lru_cache clickhouse_common_io) +target_link_libraries (lru_cache PRIVATE clickhouse_common_io) add_executable (hash_table hash_table.cpp) -target_link_libraries (hash_table clickhouse_common_io) +target_link_libraries (hash_table PRIVATE clickhouse_common_io) add_executable (small_table small_table.cpp) -target_link_libraries (small_table clickhouse_common_io) +target_link_libraries (small_table PRIVATE clickhouse_common_io) add_executable (parallel_aggregation parallel_aggregation.cpp) -target_link_libraries (parallel_aggregation clickhouse_common_io) +target_link_libraries (parallel_aggregation PRIVATE clickhouse_common_io) add_executable (parallel_aggregation2 parallel_aggregation2.cpp) -target_link_libraries (parallel_aggregation2 clickhouse_common_io) +target_link_libraries (parallel_aggregation2 PRIVATE clickhouse_common_io) add_executable (int_hashes_perf int_hashes_perf.cpp AvalancheTest.cpp Random.cpp) -target_link_libraries (int_hashes_perf clickhouse_common_io) +target_link_libraries (int_hashes_perf PRIVATE clickhouse_common_io) add_executable (simple_cache simple_cache.cpp) target_include_directories (simple_cache PRIVATE ${DBMS_INCLUDE_DIR}) -target_link_libraries (simple_cache common) +target_link_libraries (simple_cache PRIVATE common) add_executable (compact_array compact_array.cpp) -target_link_libraries (compact_array clickhouse_common_io ${Boost_FILESYSTEM_LIBRARY}) +target_link_libraries (compact_array PRIVATE clickhouse_common_io ${Boost_FILESYSTEM_LIBRARY}) add_executable (radix_sort radix_sort.cpp) -target_link_libraries (radix_sort clickhouse_common_io) +target_link_libraries (radix_sort PRIVATE clickhouse_common_io) add_executable (shell_command_test shell_command_test.cpp) -target_link_libraries (shell_command_test clickhouse_common_io) +target_link_libraries (shell_command_test PRIVATE clickhouse_common_io) add_executable (arena_with_free_lists arena_with_free_lists.cpp) -target_link_libraries (arena_with_free_lists clickhouse_common_io) +target_link_libraries (arena_with_free_lists PRIVATE clickhouse_common_io) add_executable (pod_array pod_array.cpp) -target_link_libraries (pod_array clickhouse_common_io) +target_link_libraries (pod_array PRIVATE clickhouse_common_io) add_executable (thread_creation_latency thread_creation_latency.cpp) -target_link_libraries (thread_creation_latency clickhouse_common_io) +target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io) add_executable (thread_pool thread_pool.cpp) -target_link_libraries (thread_pool clickhouse_common_io) +target_link_libraries (thread_pool PRIVATE clickhouse_common_io) add_executable (array_cache array_cache.cpp) -target_link_libraries (array_cache clickhouse_common_io) +target_link_libraries (array_cache PRIVATE clickhouse_common_io) add_executable (space_saving space_saving.cpp) -target_link_libraries (space_saving clickhouse_common_io) +target_link_libraries (space_saving PRIVATE clickhouse_common_io) add_executable (integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp) target_include_directories (integer_hash_tables_and_hashes SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR}) -target_link_libraries (integer_hash_tables_and_hashes clickhouse_common_io) +target_link_libraries (integer_hash_tables_and_hashes PRIVATE clickhouse_common_io) add_executable (allocator allocator.cpp) -target_link_libraries (allocator clickhouse_common_io) +target_link_libraries (allocator PRIVATE clickhouse_common_io) add_executable (cow_columns cow_columns.cpp) -target_link_libraries (cow_columns clickhouse_common_io) +target_link_libraries (cow_columns PRIVATE clickhouse_common_io) add_executable (stopwatch stopwatch.cpp) -target_link_libraries (stopwatch clickhouse_common_io) +target_link_libraries (stopwatch PRIVATE clickhouse_common_io) diff --git a/dbms/src/Common/tests/arena_with_free_lists.cpp b/dbms/src/Common/tests/arena_with_free_lists.cpp index 0a16eff6c1..5091551b55 100644 --- a/dbms/src/Common/tests/arena_with_free_lists.cpp +++ b/dbms/src/Common/tests/arena_with_free_lists.cpp @@ -25,6 +25,14 @@ using namespace DB; +namespace DB +{ + namespace ErrorCodes + { + extern const int SYSTEM_ERROR; + } +} + /// Implementation of ArenaWithFreeLists, which contains a bug. Used to reproduce the bug. #if USE_BAD_ARENA @@ -237,7 +245,7 @@ int main(int argc, char ** argv) rusage resource_usage; if (0 != getrusage(RUSAGE_SELF, &resource_usage)) - throwFromErrno("Cannot getrusage"); + throwFromErrno("Cannot getrusage", ErrorCodes::SYSTEM_ERROR); size_t allocated_bytes = resource_usage.ru_maxrss * 1024; std::cerr << "Current memory usage: " << allocated_bytes << " bytes.\n"; diff --git a/dbms/src/Common/tests/thread_creation_latency.cpp b/dbms/src/Common/tests/thread_creation_latency.cpp index ccc499c9a1..ef910a3e9f 100644 --- a/dbms/src/Common/tests/thread_creation_latency.cpp +++ b/dbms/src/Common/tests/thread_creation_latency.cpp @@ -22,6 +22,15 @@ void f() { ++x; } void * g(void *) { f(); return {}; } +namespace DB +{ + namespace ErrorCodes + { + extern const int PTHREAD_ERROR; + } +} + + template void test(size_t n, const char * name, F && kernel) { @@ -80,9 +89,9 @@ int main(int argc, char ** argv) { pthread_t thread; if (pthread_create(&thread, nullptr, g, nullptr)) - DB::throwFromErrno("Cannot create thread."); + DB::throwFromErrno("Cannot create thread.", DB::ErrorCodes::PTHREAD_ERROR); if (pthread_join(thread, nullptr)) - DB::throwFromErrno("Cannot join thread."); + DB::throwFromErrno("Cannot join thread.", DB::ErrorCodes::PTHREAD_ERROR); }); test(n, "Create and destroy std::thread each iteration", [] diff --git a/dbms/src/Core/BackgroundSchedulePool.cpp b/dbms/src/Core/BackgroundSchedulePool.cpp index 3e24cfca92..5da499e5ae 100644 --- a/dbms/src/Core/BackgroundSchedulePool.cpp +++ b/dbms/src/Core/BackgroundSchedulePool.cpp @@ -278,7 +278,7 @@ void BackgroundSchedulePool::delayExecutionThreadFunction() { std::unique_lock lock(delayed_tasks_mutex); - while(!shutdown) + while (!shutdown) { Poco::Timestamp min_time; diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h index cf7a0b621e..570c108b2a 100644 --- a/dbms/src/Core/Defines.h +++ b/dbms/src/Core/Defines.h @@ -62,6 +62,9 @@ #define DEFAULT_HTTP_READ_BUFFER_TIMEOUT 1800 #define DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT 1 +/// Maximum namber of http-connections between two endpoints +/// the number is unmotivated +#define DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT 15 // more aliases: https://mailman.videolan.org/pipermail/x264-devel/2014-May/010660.html diff --git a/dbms/src/Core/ExternalTable.cpp b/dbms/src/Core/ExternalTable.cpp index 9ab3a8e8d3..5bfdbb12e9 100644 --- a/dbms/src/Core/ExternalTable.cpp +++ b/dbms/src/Core/ExternalTable.cpp @@ -168,7 +168,7 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, /// Write data data.first->readPrefix(); output->writePrefix(); - while(Block block = data.first->read()) + while (Block block = data.first->read()) output->write(block); data.first->readSuffix(); output->writeSuffix(); diff --git a/dbms/src/Core/Field.h b/dbms/src/Core/Field.h index 6e07e1387f..b08c75df88 100644 --- a/dbms/src/Core/Field.h +++ b/dbms/src/Core/Field.h @@ -578,43 +578,54 @@ template <> struct TypeName { static std::string get() { return "Array"; template <> struct TypeName { static std::string get() { return "Tuple"; } }; -template struct NearestFieldType; +template struct NearestFieldTypeImpl; -template <> struct NearestFieldType { using Type = UInt64; }; -template <> struct NearestFieldType { using Type = UInt64; }; -template <> struct NearestFieldType { using Type = UInt64; }; -template <> struct NearestFieldType { using Type = UInt64; }; -#ifdef __APPLE__ -template <> struct NearestFieldType { using Type = UInt64; }; -template <> struct NearestFieldType { using Type = UInt64; }; -#endif -template <> struct NearestFieldType { using Type = UInt64; }; -template <> struct NearestFieldType { using Type = UInt128; }; -template <> struct NearestFieldType { using Type = UInt128; }; -template <> struct NearestFieldType { using Type = Int64; }; -template <> struct NearestFieldType { using Type = Int64; }; -template <> struct NearestFieldType { using Type = Int64; }; -template <> struct NearestFieldType { using Type = Int64; }; -template <> struct NearestFieldType { using Type = Int128; }; -template <> struct NearestFieldType { using Type = DecimalField; }; -template <> struct NearestFieldType { using Type = DecimalField; }; -template <> struct NearestFieldType { using Type = DecimalField; }; -template <> struct NearestFieldType> { using Type = DecimalField; }; -template <> struct NearestFieldType> { using Type = DecimalField; }; -template <> struct NearestFieldType> { using Type = DecimalField; }; -template <> struct NearestFieldType { using Type = Float64; }; -template <> struct NearestFieldType { using Type = Float64; }; -template <> struct NearestFieldType { using Type = String; }; -template <> struct NearestFieldType { using Type = String; }; -template <> struct NearestFieldType { using Type = Array; }; -template <> struct NearestFieldType { using Type = Tuple; }; -template <> struct NearestFieldType { using Type = UInt64; }; -template <> struct NearestFieldType { using Type = Null; }; +/// char may be signed or unsigned, and behave identically to signed char or unsigned char, +/// but they are always three different types. +/// signedness of char is different in Linux on x86 and Linux on ARM. +template <> struct NearestFieldTypeImpl { using Type = std::conditional_t, Int64, UInt64>; }; +template <> struct NearestFieldTypeImpl { using Type = Int64; }; +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; + +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; + +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; +template <> struct NearestFieldTypeImpl { using Type = UInt128; }; +template <> struct NearestFieldTypeImpl { using Type = UInt128; }; +template <> struct NearestFieldTypeImpl { using Type = Int64; }; +template <> struct NearestFieldTypeImpl { using Type = Int64; }; + +/// long and long long are always different types that may behave identically or not. +/// This is different on Linux and Mac. +template <> struct NearestFieldTypeImpl { using Type = Int64; }; +template <> struct NearestFieldTypeImpl { using Type = Int64; }; +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; + +template <> struct NearestFieldTypeImpl { using Type = Int128; }; +template <> struct NearestFieldTypeImpl { using Type = DecimalField; }; +template <> struct NearestFieldTypeImpl { using Type = DecimalField; }; +template <> struct NearestFieldTypeImpl { using Type = DecimalField; }; +template <> struct NearestFieldTypeImpl> { using Type = DecimalField; }; +template <> struct NearestFieldTypeImpl> { using Type = DecimalField; }; +template <> struct NearestFieldTypeImpl> { using Type = DecimalField; }; +template <> struct NearestFieldTypeImpl { using Type = Float64; }; +template <> struct NearestFieldTypeImpl { using Type = Float64; }; +template <> struct NearestFieldTypeImpl { using Type = String; }; +template <> struct NearestFieldTypeImpl { using Type = String; }; +template <> struct NearestFieldTypeImpl { using Type = Array; }; +template <> struct NearestFieldTypeImpl { using Type = Tuple; }; +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; +template <> struct NearestFieldTypeImpl { using Type = Null; }; + +template +using NearestFieldType = typename NearestFieldTypeImpl::Type; template decltype(auto) nearestFieldType(T && x) { - using U = typename NearestFieldType>::Type; + using U = NearestFieldType>; if constexpr (std::is_same_v, U>) return std::forward(x); else diff --git a/dbms/src/Core/Names.h b/dbms/src/Core/Names.h index ff8252084a..61220779f7 100644 --- a/dbms/src/Core/Names.h +++ b/dbms/src/Core/Names.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -11,6 +12,7 @@ namespace DB using Names = std::vector; using NameSet = std::unordered_set; +using NameOrderedSet = std::set; using NameToNameMap = std::unordered_map; using NameToNameSetMap = std::unordered_map; diff --git a/dbms/src/Core/tests/CMakeLists.txt b/dbms/src/Core/tests/CMakeLists.txt index 26e89341bb..b5b08bb161 100644 --- a/dbms/src/Core/tests/CMakeLists.txt +++ b/dbms/src/Core/tests/CMakeLists.txt @@ -1,18 +1,15 @@ -add_executable (exception exception.cpp) -target_link_libraries (exception clickhouse_common_io) - add_executable (string_pool string_pool.cpp) -target_link_libraries (string_pool clickhouse_common_io) +target_link_libraries (string_pool PRIVATE clickhouse_common_io) target_include_directories (string_pool SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR}) add_executable (field field.cpp) -target_link_libraries (field dbms) +target_link_libraries (field PRIVATE dbms) add_executable (move_field move_field.cpp) -target_link_libraries (move_field clickhouse_common_io) +target_link_libraries (move_field PRIVATE clickhouse_common_io) add_executable (rvo_test rvo_test.cpp) -target_link_libraries (rvo_test Threads::Threads) +target_link_libraries (rvo_test PRIVATE Threads::Threads) add_executable (string_ref_hash string_ref_hash.cpp) -target_link_libraries (string_ref_hash clickhouse_common_io) +target_link_libraries (string_ref_hash PRIVATE clickhouse_common_io) diff --git a/dbms/src/Core/tests/exception.cpp b/dbms/src/Core/tests/exception.cpp deleted file mode 100644 index d0e66f75ff..0000000000 --- a/dbms/src/Core/tests/exception.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include - -#include - -#include - - -int main(int, char **) -{ - try - { - //throw Poco::Net::ConnectionRefusedException(); - throw DB::Exception(Poco::Net::ConnectionRefusedException()); - } - catch (const DB::Exception & e) - { - std::cerr << e.displayText() << std::endl; - } - catch (const Poco::Exception & e) - { - std::cerr << e.displayText() << std::endl; - } - - return 0; -} diff --git a/dbms/src/DataStreams/DistinctSortedBlockInputStream.cpp b/dbms/src/DataStreams/DistinctSortedBlockInputStream.cpp index a14d32794f..651b9738f7 100644 --- a/dbms/src/DataStreams/DistinctSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/DistinctSortedBlockInputStream.cpp @@ -150,7 +150,7 @@ ColumnRawPtrs DistinctSortedBlockInputStream::getClearingColumns(const Block & b { ColumnRawPtrs clearing_hint_columns; clearing_hint_columns.reserve(description.size()); - for(const auto & sort_column_description : description) + for (const auto & sort_column_description : description) { const auto sort_column_ptr = block.safeGetByPosition(sort_column_description.column_number).column.get(); const auto it = std::find(key_columns.cbegin(), key_columns.cend(), sort_column_ptr); diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index 608bc06b71..4cdc265593 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -179,8 +179,9 @@ void MergingAggregatedMemoryEfficientBlockInputStream::start() auto thread_group = CurrentThread::getGroup(); reading_pool->schedule([&child, thread_group] { - CurrentThread::attachToIfDetached(thread_group); setThreadName("MergeAggReadThr"); + if (thread_group) + CurrentThread::attachToIfDetached(thread_group); CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; child->readPrefix(); }); @@ -485,8 +486,9 @@ MergingAggregatedMemoryEfficientBlockInputStream::BlocksToMerge MergingAggregate auto thread_group = CurrentThread::getGroup(); reading_pool->schedule([&input, &read_from_input, thread_group] { - CurrentThread::attachToIfDetached(thread_group); setThreadName("MergeAggReadThr"); + if (thread_group) + CurrentThread::attachToIfDetached(thread_group); CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; read_from_input(input); }); diff --git a/dbms/src/DataStreams/ParallelInputsProcessor.h b/dbms/src/DataStreams/ParallelInputsProcessor.h index a1c4e2ac48..509522de29 100644 --- a/dbms/src/DataStreams/ParallelInputsProcessor.h +++ b/dbms/src/DataStreams/ParallelInputsProcessor.h @@ -107,8 +107,26 @@ public: active_threads = max_threads; threads.reserve(max_threads); auto thread_group = CurrentThread::getGroup(); - for (size_t i = 0; i < max_threads; ++i) - threads.emplace_back([=] () { thread(thread_group, i); } ); + + try + { + for (size_t i = 0; i < max_threads; ++i) + threads.emplace_back([=] () { thread(thread_group, i); } ); + } + catch (...) + { + cancel(false); + wait(); + if (active_threads) + { + active_threads = 0; + /// handler.onFinish() is supposed to be called from one of the threads when the number of + /// finished threads reaches max_threads. But since we weren't able to launch all threads, + /// we have to call onFinish() manually here. + handler.onFinish(); + } + throw; + } } /// Ask all sources to stop earlier than they run out. diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 096a7b1fe2..f54c62b357 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -91,7 +91,8 @@ void PushingToViewsBlockOutputStream::write(const Block & block) pool.schedule([=] { setThreadName("PushingToViewsBlockOutputStream"); - CurrentThread::attachToIfDetached(thread_group); + if (thread_group) + CurrentThread::attachToIfDetached(thread_group); process(block, view_num); }); } diff --git a/dbms/src/DataStreams/tests/CMakeLists.txt b/dbms/src/DataStreams/tests/CMakeLists.txt index 3dbbc0edbe..3f6e154927 100644 --- a/dbms/src/DataStreams/tests/CMakeLists.txt +++ b/dbms/src/DataStreams/tests/CMakeLists.txt @@ -1,16 +1,16 @@ -set(SRCS ) +set(SRCS) add_executable (expression_stream expression_stream.cpp ${SRCS}) -target_link_libraries (expression_stream dbms clickhouse_storages_system) +target_link_libraries (expression_stream PRIVATE dbms clickhouse_storages_system clickhouse_parsers) add_executable (filter_stream filter_stream.cpp ${SRCS}) -target_link_libraries (filter_stream dbms clickhouse_storages_system) +target_link_libraries (filter_stream PRIVATE dbms clickhouse_storages_system clickhouse_parsers clickhouse_common_io) add_executable (union_stream2 union_stream2.cpp ${SRCS}) -target_link_libraries (union_stream2 dbms) +target_link_libraries (union_stream2 PRIVATE dbms) add_executable (collapsing_sorted_stream collapsing_sorted_stream.cpp ${SRCS}) -target_link_libraries (collapsing_sorted_stream dbms) +target_link_libraries (collapsing_sorted_stream PRIVATE dbms) add_executable (finish_sorting_stream finish_sorting_stream.cpp ${SRCS}) -target_link_libraries (finish_sorting_stream dbms) +target_link_libraries (finish_sorting_stream PRIVATE dbms) diff --git a/dbms/src/DataStreams/tests/finish_sorting_stream.cpp b/dbms/src/DataStreams/tests/finish_sorting_stream.cpp index 7627d1712b..861965e1e1 100644 --- a/dbms/src/DataStreams/tests/finish_sorting_stream.cpp +++ b/dbms/src/DataStreams/tests/finish_sorting_stream.cpp @@ -13,6 +13,15 @@ using namespace DB; +namespace DB +{ + namespace ErrorCodes + { + extern const int LOGICAL_ERROR; + } +} + + int main(int argc, char ** argv) { srand(123456); @@ -75,7 +84,7 @@ int main(int argc, char ** argv) } if (res_block.rows() != n * m) - throw Exception("Result block size mismatch"); + throw Exception("Result block size mismatch", ErrorCodes::LOGICAL_ERROR); const auto & columns = res_block.getColumns(); @@ -86,7 +95,7 @@ int main(int argc, char ** argv) if (res < 0) break; else if (res > 0) - throw Exception("Result stream not sorted"); + throw Exception("Result stream not sorted", ErrorCodes::LOGICAL_ERROR); } stopwatch.stop(); diff --git a/dbms/src/DataTypes/DataTypeArray.cpp b/dbms/src/DataTypes/DataTypeArray.cpp index de0b0d9127..1b739a040a 100644 --- a/dbms/src/DataTypes/DataTypeArray.cpp +++ b/dbms/src/DataTypes/DataTypeArray.cpp @@ -259,6 +259,10 @@ void DataTypeArray::deserializeBinaryBulkWithMultipleStreams( if (last_offset < nested_column.size()) throw Exception("Nested column is longer than last offset", ErrorCodes::LOGICAL_ERROR); size_t nested_limit = last_offset - nested_column.size(); + + /// Adjust value size hint. Divide it to the average array size. + settings.avg_value_size_hint = nested_limit ? settings.avg_value_size_hint / nested_limit * offset_values.size() : 0; + nested->deserializeBinaryBulkWithMultipleStreams(nested_column, nested_limit, settings, state); settings.path.pop_back(); diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp index aacb403fb9..3cd6af4624 100644 --- a/dbms/src/DataTypes/DataTypeEnum.cpp +++ b/dbms/src/DataTypes/DataTypeEnum.cpp @@ -103,7 +103,7 @@ DataTypeEnum::DataTypeEnum(const Values & values_) : values{values_} template void DataTypeEnum::serializeBinary(const Field & field, WriteBuffer & ostr) const { - const FieldType x = get::Type>(field); + const FieldType x = get>(field); writeBinary(x, ostr); } @@ -347,7 +347,7 @@ static DataTypePtr create(const ASTPtr & arguments) ErrorCodes::UNEXPECTED_AST_STRUCTURE); const String & field_name = name_literal->value.get(); - const auto value = value_literal->value.get::Type>(); + const auto value = value_literal->value.get>(); if (value > std::numeric_limits::max() || value < std::numeric_limits::min()) throw Exception{"Value " + toString(value) + " for element '" + field_name + "' exceeds range of " + EnumName::value, diff --git a/dbms/src/DataTypes/DataTypeNumberBase.cpp b/dbms/src/DataTypes/DataTypeNumberBase.cpp index 5f598a0992..12904b34c1 100644 --- a/dbms/src/DataTypes/DataTypeNumberBase.cpp +++ b/dbms/src/DataTypes/DataTypeNumberBase.cpp @@ -144,14 +144,14 @@ void DataTypeNumberBase::deserializeTextCSV(IColumn & column, ReadBuffer & is template Field DataTypeNumberBase::getDefault() const { - return typename NearestFieldType::Type(); + return NearestFieldType(); } template void DataTypeNumberBase::serializeBinary(const Field & field, WriteBuffer & ostr) const { /// ColumnVector::value_type is a narrower type. For example, UInt8, when the Field type is UInt64 - typename ColumnVector::value_type x = get::Type>(field); + typename ColumnVector::value_type x = get>(field); writeBinary(x, ostr); } @@ -160,7 +160,7 @@ void DataTypeNumberBase::deserializeBinary(Field & field, ReadBuffer & istr) { typename ColumnVector::value_type x; readBinary(x, istr); - field = typename NearestFieldType::Type(x); + field = NearestFieldType(x); } template diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp index cd4df69aae..2023523aa4 100644 --- a/dbms/src/DataTypes/DataTypeString.cpp +++ b/dbms/src/DataTypes/DataTypeString.cpp @@ -187,17 +187,23 @@ void DataTypeString::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, avg_chars_size = (avg_value_size_hint - sizeof(offsets[0])) * avg_value_size_hint_reserve_multiplier; } - try + size_t size_to_reserve = data.size() + std::ceil(limit * avg_chars_size); + + /// Never reserve for too big size. + if (size_to_reserve < 256 * 1024 * 1024) { - data.reserve(data.size() + std::ceil(limit * avg_chars_size)); - } - catch (Exception & e) - { - e.addMessage( - "avg_value_size_hint = " + toString(avg_value_size_hint) - + ", avg_chars_size = " + toString(avg_chars_size) - + ", limit = " + toString(limit)); - throw; + try + { + data.reserve(size_to_reserve); + } + catch (Exception & e) + { + e.addMessage( + "(avg_value_size_hint = " + toString(avg_value_size_hint) + + ", avg_chars_size = " + toString(avg_chars_size) + + ", limit = " + toString(limit) + ")"); + throw; + } } offsets.reserve(offsets.size() + limit); diff --git a/dbms/src/DataTypes/DataTypeTuple.cpp b/dbms/src/DataTypes/DataTypeTuple.cpp index 35f7ea3365..58d0513173 100644 --- a/dbms/src/DataTypes/DataTypeTuple.cpp +++ b/dbms/src/DataTypes/DataTypeTuple.cpp @@ -397,6 +397,7 @@ void DataTypeTuple::deserializeBinaryBulkWithMultipleStreams( auto * tuple_state = checkAndGetTupleDeserializeState(state); settings.path.push_back(Substream::TupleElement); + settings.avg_value_size_hint = 0; for (const auto i : ext::range(0, ext::size(elems))) { settings.path.back().tuple_element_name = names[i]; diff --git a/dbms/src/DataTypes/tests/CMakeLists.txt b/dbms/src/DataTypes/tests/CMakeLists.txt index 6186c7dfef..c2afc6eb2f 100644 --- a/dbms/src/DataTypes/tests/CMakeLists.txt +++ b/dbms/src/DataTypes/tests/CMakeLists.txt @@ -1,10 +1,10 @@ set(SRCS ) add_executable (data_types_number_fixed data_types_number_fixed.cpp ${SRCS}) -target_link_libraries (data_types_number_fixed dbms) +target_link_libraries (data_types_number_fixed PRIVATE dbms) add_executable (data_type_string data_type_string.cpp ${SRCS}) -target_link_libraries (data_type_string dbms) +target_link_libraries (data_type_string PRIVATE dbms) add_executable (data_type_get_common_type data_type_get_common_type.cpp ${SRCS}) -target_link_libraries (data_type_get_common_type dbms gtest_main) +target_link_libraries (data_type_get_common_type PRIVATE dbms gtest_main) diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 7bb4ae298e..e0fe4294e7 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -392,7 +392,7 @@ void DatabaseOrdinary::renameTable( catch (const Poco::Exception & e) { /// Better diagnostics. - throw Exception{e}; + throw Exception{Exception::CreateFromPoco, e}; } ASTPtr ast = getQueryFromMetadata(detail::getTableMetadataPath(metadata_path, table_name)); diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index 2617390fd6..2d8dbbc239 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -163,7 +163,7 @@ DatabaseWithOwnTablesBase::~DatabaseWithOwnTablesBase() { shutdown(); } - catch(...) + catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); } diff --git a/dbms/src/Dictionaries/CacheDictionary.cpp b/dbms/src/Dictionaries/CacheDictionary.cpp index 154fb6759f..41950b443f 100644 --- a/dbms/src/Dictionaries/CacheDictionary.cpp +++ b/dbms/src/Dictionaries/CacheDictionary.cpp @@ -456,7 +456,7 @@ CacheDictionary::Attribute CacheDictionary::createAttributeWithType(const Attrib { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::TYPE: \ - attr.null_values = TYPE(null_value.get::Type>()); \ + attr.null_values = TYPE(null_value.get>()); \ attr.arrays = std::make_unique>(size); \ bytes_allocated += size * sizeof(TYPE); \ break; diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp index 91b74ef834..3bf10833a8 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp @@ -11,7 +11,7 @@ ComplexKeyCacheDictionary::Attribute ComplexKeyCacheDictionary::createAttributeW { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::TYPE: \ - attr.null_values = TYPE(null_value.get::Type>()); \ + attr.null_values = TYPE(null_value.get>()); \ attr.arrays = std::make_unique>(size); \ bytes_allocated += size * sizeof(TYPE); \ break; diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp index 5d65213840..7e52b572c9 100644 --- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp @@ -421,7 +421,7 @@ void ComplexKeyHashedDictionary::calculateBytesAllocated() template void ComplexKeyHashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value) { - attribute.null_values = T(null_value.get::Type>()); + attribute.null_values = T(null_value.get>()); attribute.maps.emplace>(); } diff --git a/dbms/src/Dictionaries/DictionarySourceFactory.cpp b/dbms/src/Dictionaries/DictionarySourceFactory.cpp index 1da2268c1e..d2deb76983 100644 --- a/dbms/src/Dictionaries/DictionarySourceFactory.cpp +++ b/dbms/src/Dictionaries/DictionarySourceFactory.cpp @@ -155,8 +155,7 @@ DictionarySourcePtr DictionarySourceFactory::create( else if ("odbc" == source_type) { #if USE_POCO_SQLODBC || USE_POCO_DATAODBC - const auto & global_config = context.getConfigRef(); - BridgeHelperPtr bridge = std::make_shared>(global_config, context.getSettings().http_receive_timeout, config.getString(config_prefix + ".odbc.connection_string")); + BridgeHelperPtr bridge = std::make_shared>(context, context.getSettings().http_receive_timeout, config.getString(config_prefix + ".odbc.connection_string")); return std::make_unique(dict_struct, config, config_prefix + ".odbc", sample_block, context, bridge); #else throw Exception{"Dictionary source of type `odbc` is disabled because poco library was built without ODBC support.", diff --git a/dbms/src/Dictionaries/Embedded/RegionsHierarchy.cpp b/dbms/src/Dictionaries/Embedded/RegionsHierarchy.cpp index 978d7b9e49..71b5adc6af 100644 --- a/dbms/src/Dictionaries/Embedded/RegionsHierarchy.cpp +++ b/dbms/src/Dictionaries/Embedded/RegionsHierarchy.cpp @@ -10,6 +10,15 @@ #include +namespace DB +{ + namespace ErrorCodes + { + extern const int INCORRECT_DATA; + } +} + + RegionsHierarchy::RegionsHierarchy(IRegionsHierarchyDataSourcePtr data_source_) : data_source(data_source_) { @@ -49,7 +58,8 @@ void RegionsHierarchy::reload() if (region_entry.id > max_region_id) { if (region_entry.id > max_size) - throw DB::Exception("Region id is too large: " + DB::toString(region_entry.id) + ", should be not more than " + DB::toString(max_size)); + throw DB::Exception("Region id is too large: " + DB::toString(region_entry.id) + ", should be not more than " + DB::toString(max_size), + DB::ErrorCodes::INCORRECT_DATA); max_region_id = region_entry.id; diff --git a/dbms/src/Dictionaries/Embedded/RegionsNames.cpp b/dbms/src/Dictionaries/Embedded/RegionsNames.cpp index 4966f30455..2c48f1f18c 100644 --- a/dbms/src/Dictionaries/Embedded/RegionsNames.cpp +++ b/dbms/src/Dictionaries/Embedded/RegionsNames.cpp @@ -8,6 +8,14 @@ #include +namespace DB +{ + namespace ErrorCodes + { + extern const int INCORRECT_DATA; + } +} + RegionsNames::RegionsNames(IRegionsNamesDataProviderPtr data_provider) { @@ -78,7 +86,8 @@ void RegionsNames::reload() max_region_id = name_entry.id; if (name_entry.id > max_size) - throw DB::Exception("Region id is too large: " + DB::toString(name_entry.id) + ", should be not more than " + DB::toString(max_size)); + throw DB::Exception("Region id is too large: " + DB::toString(name_entry.id) + ", should be not more than " + DB::toString(max_size), + DB::ErrorCodes::INCORRECT_DATA); } while (name_entry.id >= new_names_refs.size()) diff --git a/dbms/src/Dictionaries/ExecutableDictionarySource.cpp b/dbms/src/Dictionaries/ExecutableDictionarySource.cpp index 8dae4cfce2..c17ce3e246 100644 --- a/dbms/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/dbms/src/Dictionaries/ExecutableDictionarySource.cpp @@ -213,7 +213,7 @@ bool ExecutableDictionarySource::supportsSelectiveLoad() const bool ExecutableDictionarySource::hasUpdateField() const { - if(update_field.empty()) + if (update_field.empty()) return false; else return true; diff --git a/dbms/src/Dictionaries/FlatDictionary.cpp b/dbms/src/Dictionaries/FlatDictionary.cpp index 4c86da1ba8..47a80ce8bc 100644 --- a/dbms/src/Dictionaries/FlatDictionary.cpp +++ b/dbms/src/Dictionaries/FlatDictionary.cpp @@ -440,7 +440,7 @@ void FlatDictionary::calculateBytesAllocated() template void FlatDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value) { - attribute.null_values = T(null_value.get::Type>()); + attribute.null_values = T(null_value.get>()); const auto & null_value_ref = std::get(attribute.null_values); attribute.arrays.emplace>(initial_array_size, null_value_ref); } diff --git a/dbms/src/Dictionaries/HashedDictionary.cpp b/dbms/src/Dictionaries/HashedDictionary.cpp index e453d907fe..737bef6b01 100644 --- a/dbms/src/Dictionaries/HashedDictionary.cpp +++ b/dbms/src/Dictionaries/HashedDictionary.cpp @@ -430,7 +430,7 @@ void HashedDictionary::calculateBytesAllocated() template void HashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value) { - attribute.null_values = T(null_value.get::Type>()); + attribute.null_values = T(null_value.get>()); attribute.maps = std::make_unique>(); } diff --git a/dbms/src/Dictionaries/RangeHashedDictionary.cpp b/dbms/src/Dictionaries/RangeHashedDictionary.cpp index 54926e75f1..9aeea8eff7 100644 --- a/dbms/src/Dictionaries/RangeHashedDictionary.cpp +++ b/dbms/src/Dictionaries/RangeHashedDictionary.cpp @@ -261,7 +261,7 @@ void RangeHashedDictionary::calculateBytesAllocated() template void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value) { - attribute.null_values = T(null_value.get::Type>()); + attribute.null_values = T(null_value.get>()); attribute.maps = std::make_unique>(); } diff --git a/dbms/src/Dictionaries/TrieDictionary.cpp b/dbms/src/Dictionaries/TrieDictionary.cpp index 1efbe8c2d7..615a8df2ae 100644 --- a/dbms/src/Dictionaries/TrieDictionary.cpp +++ b/dbms/src/Dictionaries/TrieDictionary.cpp @@ -352,7 +352,7 @@ void TrieDictionary::validateKeyTypes(const DataTypes & key_types) const template void TrieDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value) { - attribute.null_values = T(null_value.get::Type>()); + attribute.null_values = T(null_value.get>()); attribute.maps.emplace>(); } diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index 8ed9d882a2..1af37a85ce 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -7,9 +7,9 @@ #include // Y_IGNORE #include #include - #include // Y_IGNORE #include // Y_IGNORE +#include // Y_IGNORE #include #include #include @@ -18,6 +18,13 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_TYPE_OF_FIELD; + extern const int BAD_ARGUMENTS; + extern const int THERE_IS_NO_COLUMN; +} + static String getSchemaPath(const String & schema_dir, const String & schema_file) { return schema_dir + escapeForFileName(schema_file) + ".capnp"; @@ -42,7 +49,7 @@ Field convertNodeToField(capnp::DynamicValue::Reader value) switch (value.getType()) { case capnp::DynamicValue::UNKNOWN: - throw Exception("Unknown field type"); + throw Exception("Unknown field type", ErrorCodes::BAD_TYPE_OF_FIELD); case capnp::DynamicValue::VOID: return Field(); case capnp::DynamicValue::BOOL: @@ -87,9 +94,9 @@ Field convertNodeToField(capnp::DynamicValue::Reader value) return field; } case capnp::DynamicValue::CAPABILITY: - throw Exception("CAPABILITY type not supported"); + throw Exception("CAPABILITY type not supported", ErrorCodes::BAD_TYPE_OF_FIELD); case capnp::DynamicValue::ANY_POINTER: - throw Exception("ANY_POINTER type not supported"); + throw Exception("ANY_POINTER type not supported", ErrorCodes::BAD_TYPE_OF_FIELD); } return Field(); } @@ -99,7 +106,7 @@ capnp::StructSchema::Field getFieldOrThrow(capnp::StructSchema node, const std:: KJ_IF_MAYBE(child, node.findFieldByName(field)) return *child; else - throw Exception("Field " + field + " doesn't exist in schema " + node.getShortDisplayName().cStr()); + throw Exception("Field " + field + " doesn't exist in schema " + node.getShortDisplayName().cStr(), ErrorCodes::THERE_IS_NO_COLUMN); } void CapnProtoRowInputStream::createActions(const NestedFieldList & sortedFields, capnp::StructSchema reader) @@ -135,7 +142,7 @@ void CapnProtoRowInputStream::createActions(const NestedFieldList & sortedFields break; // Collect list } else - throw Exception("Field " + field.tokens[level] + "is neither Struct nor List"); + throw Exception("Field " + field.tokens[level] + "is neither Struct nor List", ErrorCodes::BAD_TYPE_OF_FIELD); } // Read field from the structure @@ -214,7 +221,12 @@ bool CapnProtoRowInputStream::read(MutableColumns & columns) array = heap_array.asPtr(); } + +#if CAPNP_VERSION >= 8000 capnp::UnalignedFlatArrayMessageReader msg(array); +#else + capnp::FlatArrayMessageReader msg(array); +#endif std::vector stack; stack.push_back(msg.getRoot(root)); @@ -285,7 +297,8 @@ void registerInputFormatCapnProto(FormatFactory & factory) auto schema_and_root = context.getSettingsRef().format_schema.toString(); boost::split(tokens, schema_and_root, boost::is_any_of(":")); if (tokens.size() != 2) - throw Exception("Format CapnProto requires 'format_schema' setting to have a schema_file:root_object format, e.g. 'schema.capnp:Message'"); + throw Exception("Format CapnProto requires 'format_schema' setting to have a schema_file:root_object format, e.g. 'schema.capnp:Message'", + ErrorCodes::BAD_ARGUMENTS); const String & schema_dir = context.getFormatSchemaPath(); diff --git a/dbms/src/Formats/tests/CMakeLists.txt b/dbms/src/Formats/tests/CMakeLists.txt index de94fb4d4f..e12fa0f02f 100644 --- a/dbms/src/Formats/tests/CMakeLists.txt +++ b/dbms/src/Formats/tests/CMakeLists.txt @@ -1,7 +1,7 @@ set(SRCS ) add_executable (tab_separated_streams tab_separated_streams.cpp ${SRCS}) -target_link_libraries (tab_separated_streams dbms) +target_link_libraries (tab_separated_streams PRIVATE dbms) add_executable (block_row_transforms block_row_transforms.cpp ${SRCS}) -target_link_libraries (block_row_transforms dbms) +target_link_libraries (block_row_transforms PRIVATE dbms) diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index d57e399090..95268580dd 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -52,7 +52,7 @@ list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h Func add_library(clickhouse_functions ${LINK_MODE} ${clickhouse_functions_sources}) -target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE ${CONSISTENT_HASHING_LIBRARY} consistent-hashing-sumbur ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES} murmurhash) +target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE ${CONSISTENT_HASHING_LIBRARY} consistent-hashing-sumbur ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES} murmurhash ${BASE64_LIBRARY}) target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR}) @@ -81,3 +81,7 @@ endif () if (USE_EMBEDDED_COMPILER) target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) endif () + +if (USE_BASE64) + target_include_directories (clickhouse_functions SYSTEM PRIVATE ${BASE64_INCLUDE_DIR}) +endif() diff --git a/dbms/src/Functions/FunctionArrayMapped.h b/dbms/src/Functions/FunctionArrayMapped.h index c23e756d4d..f14ac244a9 100644 --- a/dbms/src/Functions/FunctionArrayMapped.h +++ b/dbms/src/Functions/FunctionArrayMapped.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB @@ -70,7 +71,7 @@ public: if (!array_type) throw Exception("Argument " + toString(i + 2) + " of function " + getName() + " must be array. Found " + arguments[i + 1]->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - nested_types[i] = array_type->getNestedType(); + nested_types[i] = removeLowCardinality(array_type->getNestedType()); } const DataTypeFunction * function_type = checkAndGetDataType(arguments[0].get()); @@ -121,7 +122,7 @@ public: /// The types of the remaining arguments are already checked in getLambdaArgumentTypes. - DataTypePtr return_type = data_type_function->getReturnType(); + DataTypePtr return_type = removeLowCardinality(data_type_function->getReturnType()); if (Impl::needBoolean() && !WhichDataType(return_type).isUInt8()) throw Exception("Expression for function " + getName() + " must return UInt8, found " + return_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -184,6 +185,8 @@ public: if (!column_const_array) throw Exception("Expected array column, found " + column_array_ptr->getName(), ErrorCodes::ILLEGAL_COLUMN); column_array_ptr = column_const_array->convertToFullColumn(); + if (column_array_ptr->lowCardinality()) + column_array_ptr = column_array_ptr->convertToFullColumnIfLowCardinality(); column_array = checkAndGetColumn(column_array_ptr.get()); } @@ -209,7 +212,8 @@ public: } arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(), - array_type->getNestedType(), array_with_type_and_name.name)); + removeLowCardinality(array_type->getNestedType()), + array_with_type_and_name.name)); } /// Put all the necessary columns multiplied by the sizes of arrays into the block. @@ -217,8 +221,11 @@ public: auto * replicated_column_function = typeid_cast(replicated_column_function_ptr.get()); replicated_column_function->appendArguments(arrays); - block.getByPosition(result).column = Impl::execute(*column_first_array, - replicated_column_function->reduce().column); + auto lambda_result = replicated_column_function->reduce().column; + if (lambda_result->lowCardinality()) + lambda_result = lambda_result->convertToFullColumnIfLowCardinality(); + + block.getByPosition(result).column = Impl::execute(*column_first_array, lambda_result); } } }; diff --git a/dbms/src/Functions/FunctionBase64Conversion.h b/dbms/src/Functions/FunctionBase64Conversion.h new file mode 100644 index 0000000000..3afa65c8ac --- /dev/null +++ b/dbms/src/Functions/FunctionBase64Conversion.h @@ -0,0 +1,169 @@ +#include +#if USE_BASE64 +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +using namespace GatherUtils; + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int INCORRECT_DATA; +} + +struct Base64Encode +{ + static constexpr auto name = "base64Encode"; + static size_t getBufferSize(size_t string_length, size_t string_count) + { + return ( ( string_length - string_count ) / 3 + string_count ) * 4 + string_count ; + } +}; + +struct Base64Decode +{ + static constexpr auto name = "base64Decode"; + + static size_t getBufferSize(size_t string_length, size_t string_count) + { + return ( ( string_length - string_count) / 4 + string_count) * 3 + string_count; + } +}; + +struct TryBase64Decode +{ + static constexpr auto name = "tryBase64Decode"; + + static size_t getBufferSize(size_t string_length, size_t string_count) + { + return Base64Decode::getBufferSize(string_length, string_count); + } +}; + +template +class FunctionBase64Conversion : public IFunction +{ +public: + static constexpr auto name = Func::name; + + static FunctionPtr create(const Context &) + { + return std::make_shared(); + } + + String getName() const override + { + return Func::name; + } + + size_t getNumberOfArguments() const override + { + return 1; + } + + bool useDefaultImplementationForConstants() const override + { + return true; + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (!WhichDataType(arguments[0].type).isString()) + throw Exception( + "Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + ". Must be String.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + const ColumnPtr column_string = block.getByPosition(arguments[0]).column; + const ColumnString * input = checkAndGetColumn(column_string.get()); + + if (!input) + throw Exception( + "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + auto dst_column = ColumnString::create(); + auto & dst_data = dst_column->getChars(); + auto & dst_offsets = dst_column->getOffsets(); + + size_t reserve = Func::getBufferSize(input->getChars().size(), input->size()); + dst_data.resize(reserve); + dst_offsets.resize(input_rows_count); + + const ColumnString::Offsets & src_offsets = input->getOffsets(); + + auto source = reinterpret_cast(input->getChars().data()); + auto dst = reinterpret_cast(dst_data.data()); + auto dst_pos = dst; + + size_t src_offset_prev = 0; + + int codec = getCodec(); + for (size_t row = 0; row < input_rows_count; ++row) + { + size_t srclen = src_offsets[row] - src_offset_prev - 1; + size_t outlen = 0; + + if constexpr (std::is_same_v) + { + base64_encode(source, srclen, dst_pos, &outlen, codec); + } + else if constexpr (std::is_same_v) + { + if (!base64_decode(source, srclen, dst_pos, &outlen, codec)) + { + throw Exception("Failed to " + getName() + " input '" + String(source, srclen) + "'", ErrorCodes::INCORRECT_DATA); + } + } + else + { + // during decoding character array can be partially polluted + // if fail, revert back and clean + auto savepoint = dst_pos; + if (!base64_decode(source, srclen, dst_pos, &outlen, codec)) + { + outlen = 0; + dst_pos = savepoint; + // clean the symbol + dst_pos[0] = 0; + } + } + + source += srclen + 1; + dst_pos += outlen + 1; + + dst_offsets[row] = dst_pos - dst; + src_offset_prev = src_offsets[row]; + } + + dst_data.resize(dst_pos - dst); + + block.getByPosition(result).column = std::move(dst_column); + } + +private: + static int getCodec() + { + /// You can provide different value if you want to test specific codecs. + /// Due to poor implementation of "base64" library (it will write to a global variable), + /// it doesn't scale for multiple threads. Never use non-zero values in production. + return 0; + } +}; +} +#endif diff --git a/dbms/src/Functions/FunctionHelpers.h b/dbms/src/Functions/FunctionHelpers.h index 88737ca86d..6be58f32c5 100644 --- a/dbms/src/Functions/FunctionHelpers.h +++ b/dbms/src/Functions/FunctionHelpers.h @@ -73,13 +73,13 @@ const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * co template inline std::enable_if_t, Field> toField(const T & x) { - return Field(typename NearestFieldType::Type(x)); + return Field(NearestFieldType(x)); } template inline std::enable_if_t, Field> toField(const T & x, UInt32 scale) { - return Field(typename NearestFieldType::Type(x, scale)); + return Field(NearestFieldType(x, scale)); } diff --git a/dbms/src/Functions/FunctionsArithmetic.h b/dbms/src/Functions/FunctionsArithmetic.h index ab0b5a1ec0..7589d7b912 100644 --- a/dbms/src/Functions/FunctionsArithmetic.h +++ b/dbms/src/Functions/FunctionsArithmetic.h @@ -1238,7 +1238,7 @@ class FunctionBinaryArithmetic : public IFunction auto column_to = ColumnAggregateFunction::create(columns[0]->getAggregateFunction()); column_to->reserve(input_rows_count); - for(size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { column_to->insertFrom(columns[0]->getData()[i]); column_to->insertMergeFrom(columns[1]->getData()[i]); diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index 15d253e778..d785cf3071 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -38,6 +38,15 @@ namespace DB { +namespace ErrorCodes +{ + extern const int TOO_LARGE_STRING_SIZE; + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; +} + /** Comparison functions: ==, !=, <, >, <=, >=. * The comparison functions always return 0 or 1 (UInt8). @@ -833,7 +842,7 @@ private: ReadBufferFromMemory in(string_value.data, string_value.size); readDateText(date, in); if (!in.eof()) - throw Exception("String is too long for Date: " + string_value.toString()); + throw Exception("String is too long for Date: " + string_value.toString(), ErrorCodes::TOO_LARGE_STRING_SIZE); ColumnPtr parsed_const_date_holder = DataTypeDate().createColumnConst(input_rows_count, date); const ColumnConst * parsed_const_date = static_cast(parsed_const_date_holder.get()); @@ -847,7 +856,7 @@ private: ReadBufferFromMemory in(string_value.data, string_value.size); readDateTimeText(date_time, in); if (!in.eof()) - throw Exception("String is too long for DateTime: " + string_value.toString()); + throw Exception("String is too long for DateTime: " + string_value.toString(), ErrorCodes::TOO_LARGE_STRING_SIZE); ColumnPtr parsed_const_date_time_holder = DataTypeDateTime().createColumnConst(input_rows_count, UInt64(date_time)); const ColumnConst * parsed_const_date_time = static_cast(parsed_const_date_time_holder.get()); @@ -861,7 +870,7 @@ private: ReadBufferFromMemory in(string_value.data, string_value.size); readText(uuid, in); if (!in.eof()) - throw Exception("String is too long for UUID: " + string_value.toString()); + throw Exception("String is too long for UUID: " + string_value.toString(), ErrorCodes::TOO_LARGE_STRING_SIZE); ColumnPtr parsed_const_uuid_holder = DataTypeUUID().createColumnConst(input_rows_count, uuid); const ColumnConst * parsed_const_uuid = static_cast(parsed_const_uuid_holder.get()); diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.cpp b/dbms/src/Functions/FunctionsExternalDictionaries.cpp index 89276d6156..b28b7901ee 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.cpp +++ b/dbms/src/Functions/FunctionsExternalDictionaries.cpp @@ -37,6 +37,8 @@ void registerFunctionsExternalDictionaries(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index 73d4375f88..86f463ac0b 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -1145,6 +1145,219 @@ using FunctionDictGetDateOrDefault = FunctionDictGetOrDefault; using FunctionDictGetUUIDOrDefault = FunctionDictGetOrDefault; +#define FOR_DICT_TYPES(M) \ + M(UInt8) \ + M(UInt16) \ + M(UInt32) \ + M(UInt64) \ + M(Int8) \ + M(Int16) \ + M(Int32) \ + M(Int64) \ + M(Float32) \ + M(Float64) \ + M(Date) \ + M(DateTime) \ + M(UUID) + +/// This variant of function derives the result type automatically. +class FunctionDictGetNoType final : public IFunction +{ +public: + static constexpr auto name = "dictGet"; + + static FunctionPtr create(const Context & context) + { + return std::make_shared(context.getExternalDictionaries(), context); + } + + FunctionDictGetNoType(const ExternalDictionaries & dictionaries, const Context & context) : dictionaries(dictionaries), context(context) {} + + String getName() const override { return name; } + +private: + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + + bool useDefaultImplementationForConstants() const final { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; } + + bool isInjective(const Block & sample_block) override + { + return isDictGetFunctionInjective(dictionaries, sample_block); + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 3 && arguments.size() != 4) + throw Exception{"Function " + getName() + " takes 3 or 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + + String dict_name; + if (auto name_col = checkAndGetColumnConst(arguments[0].column.get())) + { + dict_name = name_col->getValue(); + } + else + throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + String attr_name; + if (auto name_col = checkAndGetColumnConst(arguments[1].column.get())) + { + attr_name = name_col->getValue(); + } + else + throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName() + + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + if (!WhichDataType(arguments[2].type).isUInt64() && + !isTuple(arguments[2].type)) + throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName() + + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + if (arguments.size() == 4 ) + { + const auto range_argument = arguments[3].type.get(); + if (!(range_argument->isValueRepresentedByInteger() && + range_argument->getSizeOfValueInMemory() <= sizeof(Int64))) + throw Exception{"Illegal type " + range_argument->getName() + " of fourth argument of function " + getName() + + ", must be convertible to " + TypeName::get() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + } + + auto dict = dictionaries.getDictionary(dict_name); + const DictionaryStructure & structure = dict->getStructure(); + + for (const auto idx : ext::range(0, structure.attributes.size())) + { + const DictionaryAttribute & attribute = structure.attributes[idx]; + if (attribute.name == attr_name) + { + WhichDataType dt = attribute.type; + if (dt.idx == TypeIndex::String) + impl = FunctionDictGetString::create(context); +#define DISPATCH(TYPE) \ + else if (dt.idx == TypeIndex::TYPE) \ + impl = FunctionDictGet::create(context); + FOR_DICT_TYPES(DISPATCH) +#undef DISPATCH + else + throw Exception("Unknown dictGet type", ErrorCodes::UNKNOWN_TYPE); + return attribute.type; + } + } + throw Exception{"No such attribute '" + attr_name + "'", ErrorCodes::BAD_ARGUMENTS}; + } + + bool isDeterministic() const override { return false; } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + impl->executeImpl(block, arguments, result, input_rows_count); + } + +private: + const ExternalDictionaries & dictionaries; + const Context & context; + mutable FunctionPtr impl; // underlying function used by dictGet function without explicit type info +}; + + +class FunctionDictGetNoTypeOrDefault final : public IFunction +{ +public: + static constexpr auto name = "dictGetOrDefault"; + + static FunctionPtr create(const Context & context) + { + return std::make_shared(context.getExternalDictionaries(), context); + } + + FunctionDictGetNoTypeOrDefault(const ExternalDictionaries & dictionaries, const Context & context) : dictionaries(dictionaries), context(context) {} + + String getName() const override { return name; } + +private: + size_t getNumberOfArguments() const override { return 4; } + + bool useDefaultImplementationForConstants() const final { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; } + + bool isInjective(const Block & sample_block) override + { + return isDictGetFunctionInjective(dictionaries, sample_block); + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + String dict_name; + if (auto name_col = checkAndGetColumnConst(arguments[0].column.get())) + { + dict_name = name_col->getValue(); + } + else + throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + String attr_name; + if (auto name_col = checkAndGetColumnConst(arguments[1].column.get())) + { + attr_name = name_col->getValue(); + } + else + throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName() + + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + if (!WhichDataType(arguments[2].type).isUInt64() && + !isTuple(arguments[2].type)) + throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName() + + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + auto dict = dictionaries.getDictionary(dict_name); + const DictionaryStructure & structure = dict->getStructure(); + + for (const auto idx : ext::range(0, structure.attributes.size())) + { + const DictionaryAttribute & attribute = structure.attributes[idx]; + if (attribute.name == attr_name) + { + WhichDataType dt = attribute.type; + if (dt.idx == TypeIndex::String) + { + if (!isString(arguments[3].type)) + throw Exception{"Illegal type " + arguments[3].type->getName() + " of fourth argument of function " + getName() + + ", must be String.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + impl = FunctionDictGetStringOrDefault::create(context); + } +#define DISPATCH(TYPE) \ + else if (dt.idx == TypeIndex::TYPE) \ + { \ + if (!checkAndGetDataType(arguments[3].type.get())) \ + throw Exception{"Illegal type " + arguments[3].type->getName() + " of fourth argument of function " + getName() \ + + ", must be " + String(DataType##TYPE{}.getFamilyName()) + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; \ + impl = FunctionDictGetOrDefault::create(context); \ + } + FOR_DICT_TYPES(DISPATCH) +#undef DISPATCH + else + throw Exception("Unknown dictGetOrDefault type", ErrorCodes::UNKNOWN_TYPE); + return attribute.type; + } + } + throw Exception{"No such attribute '" + attr_name + "'", ErrorCodes::BAD_ARGUMENTS}; + } + + bool isDeterministic() const override { return false; } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + impl->executeImpl(block, arguments, result, input_rows_count); + } + +private: + const ExternalDictionaries & dictionaries; + const Context & context; + mutable FunctionPtr impl; // underlying function used by dictGet function without explicit type info +}; /// Functions to work with hierarchies. diff --git a/dbms/src/Functions/FunctionsFindCluster.h b/dbms/src/Functions/FunctionsFindCluster.h index dcd76915e8..9e7e43c7dd 100644 --- a/dbms/src/Functions/FunctionsFindCluster.h +++ b/dbms/src/Functions/FunctionsFindCluster.h @@ -185,7 +185,7 @@ protected: for (size_t k = 0; k < array.size(); ++k) { const Field & tmp_field = array[k]; - typename NearestFieldType::Type value; + NearestFieldType value; if (!tmp_field.tryGet(value)) return false; diff --git a/dbms/src/Functions/FunctionsGeo.h b/dbms/src/Functions/FunctionsGeo.h index 456198b9e0..5ac23138a8 100644 --- a/dbms/src/Functions/FunctionsGeo.h +++ b/dbms/src/Functions/FunctionsGeo.h @@ -20,6 +20,8 @@ namespace DB namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; extern const int LOGICAL_ERROR; } @@ -200,14 +202,15 @@ private: if (arguments.size() < 6 || arguments.size() % 4 != 2) { throw Exception( - "Incorrect number of arguments of function " + getName() + ". Must be 2 for your point plus 4 * N for ellipses (x_i, y_i, a_i, b_i)."); + "Incorrect number of arguments of function " + getName() + ". Must be 2 for your point plus 4 * N for ellipses (x_i, y_i, a_i, b_i).", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } /// For array on stack, see below. if (arguments.size() > 10000) { throw Exception( - "Number of arguments of function " + getName() + " is too large."); + "Number of arguments of function " + getName() + " is too large.", ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION); } for (const auto arg_idx : ext::range(0, arguments.size())) diff --git a/dbms/src/Functions/FunctionsStringArray.h b/dbms/src/Functions/FunctionsStringArray.h index 615e9cdb7c..75b27751fc 100644 --- a/dbms/src/Functions/FunctionsStringArray.h +++ b/dbms/src/Functions/FunctionsStringArray.h @@ -145,7 +145,7 @@ public: String sep_str = col->getValue(); if (sep_str.size() != 1) - throw Exception("Illegal separator for function " + getName() + ". Must be exactly one byte."); + throw Exception("Illegal separator for function " + getName() + ". Must be exactly one byte.", ErrorCodes::BAD_ARGUMENTS); sep = sep_str[0]; } diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index 243120d767..584a625039 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -26,6 +26,7 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; } @@ -976,7 +977,7 @@ public: const ColumnPtr column_replacement = block.getByPosition(arguments[2]).column; if (!column_needle->isColumnConst() || !column_replacement->isColumnConst()) - throw Exception("2nd and 3rd arguments of function " + getName() + " must be constants."); + throw Exception("2nd and 3rd arguments of function " + getName() + " must be constants.", ErrorCodes::ILLEGAL_COLUMN); const IColumn * c1 = block.getByPosition(arguments[1]).column.get(); const IColumn * c2 = block.getByPosition(arguments[2]).column.get(); diff --git a/dbms/src/Functions/FunctionsURL.h b/dbms/src/Functions/FunctionsURL.h index c1b576e15f..f66b428e4a 100644 --- a/dbms/src/Functions/FunctionsURL.h +++ b/dbms/src/Functions/FunctionsURL.h @@ -202,15 +202,28 @@ struct ExtractFirstSignificantSubdomain if (!last_3_periods[2]) last_3_periods[2] = begin - 1; - if (!strncmp(last_3_periods[1] + 1, "com.", 4) /// Note that in ColumnString every value has zero byte after it. - || !strncmp(last_3_periods[1] + 1, "net.", 4) - || !strncmp(last_3_periods[1] + 1, "org.", 4) - || !strncmp(last_3_periods[1] + 1, "co.", 3) - || !strncmp(last_3_periods[1] + 1, "biz.", 4)) + size_t size_of_second_subdomain_plus_period = last_3_periods[0] - last_3_periods[1]; + if (size_of_second_subdomain_plus_period == 4 || size_of_second_subdomain_plus_period == 3) { - res_data += last_3_periods[2] + 1 - begin; - res_size = last_3_periods[1] - last_3_periods[2] - 1; - return; + /// We will key by four bytes that are either ".xyz" or ".xy.". + UInt32 key = unalignedLoad(last_3_periods[1]); + + /// NOTE: assuming little endian. + /// NOTE: does the compiler generate SIMD code? + /// NOTE: for larger amount of cases we can use a perfect hash table (see 'gperf' as an example). + if ( key == '.' + 'c' * 0x100U + 'o' * 0x10000U + 'm' * 0x1000000U + || key == '.' + 'n' * 0x100U + 'e' * 0x10000U + 't' * 0x1000000U + || key == '.' + 'o' * 0x100U + 'r' * 0x10000U + 'g' * 0x1000000U + || key == '.' + 'b' * 0x100U + 'i' * 0x10000U + 'z' * 0x1000000U + || key == '.' + 'g' * 0x100U + 'o' * 0x10000U + 'v' * 0x1000000U + || key == '.' + 'm' * 0x100U + 'i' * 0x10000U + 'l' * 0x1000000U + || key == '.' + 'e' * 0x100U + 'd' * 0x10000U + 'u' * 0x1000000U + || key == '.' + 'c' * 0x100U + 'o' * 0x10000U + '.' * 0x1000000U) + { + res_data += last_3_periods[2] + 1 - begin; + res_size = last_3_periods[1] - last_3_periods[2] - 1; + return; + } } res_data += last_3_periods[1] + 1 - begin; diff --git a/dbms/src/Functions/FunctionsVisitParam.h b/dbms/src/Functions/FunctionsVisitParam.h index 82bf8946e7..13ae400223 100644 --- a/dbms/src/Functions/FunctionsVisitParam.h +++ b/dbms/src/Functions/FunctionsVisitParam.h @@ -104,7 +104,7 @@ struct ExtractRaw } else { - switch(*pos) + switch (*pos) { case '[': expects_end.push_back((current_expect_end = ']')); diff --git a/dbms/src/Functions/GatherUtils/IArraySink.h b/dbms/src/Functions/GatherUtils/IArraySink.h index a3f8433c75..e7a3da1d5b 100644 --- a/dbms/src/Functions/GatherUtils/IArraySink.h +++ b/dbms/src/Functions/GatherUtils/IArraySink.h @@ -1,8 +1,17 @@ #pragma once + #include #include -namespace DB::GatherUtils +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +namespace GatherUtils { struct IArraySink @@ -11,7 +20,7 @@ struct IArraySink virtual void accept(ArraySinkVisitor &) { - throw Exception("Accept not implemented for " + demangle(typeid(*this).name())); + throw Exception("Accept not implemented for " + demangle(typeid(*this).name()), ErrorCodes::NOT_IMPLEMENTED); } }; @@ -19,3 +28,5 @@ template class ArraySinkImpl : public Visitable {}; } + +} diff --git a/dbms/src/Functions/GatherUtils/IArraySource.h b/dbms/src/Functions/GatherUtils/IArraySource.h index dd62395726..a33f23036c 100644 --- a/dbms/src/Functions/GatherUtils/IArraySource.h +++ b/dbms/src/Functions/GatherUtils/IArraySource.h @@ -3,7 +3,15 @@ #include #include -namespace DB::GatherUtils +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +namespace GatherUtils { struct IArraySource @@ -18,7 +26,7 @@ struct IArraySource virtual void accept(ArraySourceVisitor &) { - throw Exception("Accept not implemented for " + demangle(typeid(*this).name())); + throw Exception("Accept not implemented for " + demangle(typeid(*this).name()), ErrorCodes::NOT_IMPLEMENTED); } }; @@ -26,3 +34,5 @@ template class ArraySourceImpl : public Visitable {}; } + +} diff --git a/dbms/src/Functions/GatherUtils/IValueSource.h b/dbms/src/Functions/GatherUtils/IValueSource.h index 9983a24e9f..bd26424e1b 100644 --- a/dbms/src/Functions/GatherUtils/IValueSource.h +++ b/dbms/src/Functions/GatherUtils/IValueSource.h @@ -1,8 +1,17 @@ #pragma once + #include #include -namespace DB::GatherUtils +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +namespace GatherUtils { struct IValueSource @@ -11,7 +20,7 @@ struct IValueSource virtual void accept(ValueSourceVisitor &) { - throw Exception("Accept not implemented for " + demangle(typeid(*this).name())); + throw Exception("Accept not implemented for " + demangle(typeid(*this).name()), ErrorCodes::NOT_IMPLEMENTED); } virtual bool isConst() const { return false; } @@ -21,3 +30,5 @@ template class ValueSourceImpl : public Visitable {}; } + +} diff --git a/dbms/src/Functions/GatherUtils/Selectors.h b/dbms/src/Functions/GatherUtils/Selectors.h index 8b81d2f1db..a09fdb4105 100644 --- a/dbms/src/Functions/GatherUtils/Selectors.h +++ b/dbms/src/Functions/GatherUtils/Selectors.h @@ -179,7 +179,7 @@ struct ArrayAndValueSourceSelectorBySink : public ArraySinkSelector *>(&array_source), typeid_cast *>(&value_source)); - else if(array_source.isConst()) + else if (array_source.isConst()) checkTypeAndCallConcat(typeid_cast *>(&array_source), typeid_cast(&value_source)); else if (value_source.isConst()) diff --git a/dbms/src/Functions/GatherUtils/Sources.h b/dbms/src/Functions/GatherUtils/Sources.h index ccfeb25a21..16e78bea26 100644 --- a/dbms/src/Functions/GatherUtils/Sources.h +++ b/dbms/src/Functions/GatherUtils/Sources.h @@ -142,7 +142,7 @@ struct ConstSource : public Base else throw Exception( "accept(ArraySourceVisitor &) is not implemented for " + demangle(typeid(ConstSource).name()) - + " because " + demangle(typeid(Base).name()) + " is not derived from IArraySource "); + + " because " + demangle(typeid(Base).name()) + " is not derived from IArraySource", ErrorCodes::NOT_IMPLEMENTED); } virtual void accept(ValueSourceVisitor & visitor) // override @@ -152,7 +152,7 @@ struct ConstSource : public Base else throw Exception( "accept(ValueSourceVisitor &) is not implemented for " + demangle(typeid(ConstSource).name()) - + " because " + demangle(typeid(Base).name()) + " is not derived from IValueSource "); + + " because " + demangle(typeid(Base).name()) + " is not derived from IValueSource", ErrorCodes::NOT_IMPLEMENTED); } void next() diff --git a/dbms/src/Functions/arrayCumSum.cpp b/dbms/src/Functions/arrayCumSum.cpp index 756f57b5b7..5351ca745d 100644 --- a/dbms/src/Functions/arrayCumSum.cpp +++ b/dbms/src/Functions/arrayCumSum.cpp @@ -7,6 +7,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + struct ArrayCumSumImpl { static bool needBoolean() { return false; } @@ -108,7 +113,7 @@ struct ArrayCumSumImpl executeType(mapped, array, res)) return res; else - throw Exception("Unexpected column for arrayCumSum: " + mapped->getName()); + throw Exception("Unexpected column for arrayCumSum: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN); } }; diff --git a/dbms/src/Functions/arrayCumSumNonNegative.cpp b/dbms/src/Functions/arrayCumSumNonNegative.cpp index 4dc4c9d0ef..59c80cef85 100644 --- a/dbms/src/Functions/arrayCumSumNonNegative.cpp +++ b/dbms/src/Functions/arrayCumSumNonNegative.cpp @@ -7,6 +7,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + /** arrayCumSumNonNegative() - returns an array with cumulative sums of the original. (If value < 0 -> 0). */ struct ArrayCumSumNonNegativeImpl @@ -88,7 +93,7 @@ struct ArrayCumSumNonNegativeImpl executeType(mapped, array, res)) return res; else - throw Exception("Unexpected column for arrayCumSumNonNegativeImpl: " + mapped->getName()); + throw Exception("Unexpected column for arrayCumSumNonNegativeImpl: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN); } }; diff --git a/dbms/src/Functions/arrayDifference.cpp b/dbms/src/Functions/arrayDifference.cpp index 847d1929db..1228591aa0 100644 --- a/dbms/src/Functions/arrayDifference.cpp +++ b/dbms/src/Functions/arrayDifference.cpp @@ -7,6 +7,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + /** arrayDifference() - returns an array with the difference between all pairs of neighboring elements. */ struct ArrayDifferenceImpl @@ -85,7 +90,7 @@ struct ArrayDifferenceImpl executeType(mapped, array, res)) return res; else - throw Exception("Unexpected column for arrayDifference: " + mapped->getName()); + throw Exception("Unexpected column for arrayDifference: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN); } }; diff --git a/dbms/src/Functions/arrayIntersect.cpp b/dbms/src/Functions/arrayIntersect.cpp index b646d5d89b..e5f1d4f0da 100644 --- a/dbms/src/Functions/arrayIntersect.cpp +++ b/dbms/src/Functions/arrayIntersect.cpp @@ -324,9 +324,9 @@ void FunctionArrayIntersect::executeImpl(Block & block, const ColumnNumbers & ar result_column = execute, true>(arrays, std::move(column)); else if (which.isDateTime()) result_column = execute, true>(arrays, std::move(column)); - else if(which.isString()) + else if (which.isString()) result_column = execute(arrays, std::move(column)); - else if(which.isFixedString()) + else if (which.isFixedString()) result_column = execute(arrays, std::move(column)); else { diff --git a/dbms/src/Functions/arraySum.cpp b/dbms/src/Functions/arraySum.cpp index c3d22bf62b..0f6db12f1e 100644 --- a/dbms/src/Functions/arraySum.cpp +++ b/dbms/src/Functions/arraySum.cpp @@ -7,6 +7,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + struct ArraySumImpl { static bool needBoolean() { return false; } @@ -93,7 +98,7 @@ struct ArraySumImpl executeType(mapped, offsets, res)) return res; else - throw Exception("Unexpected column for arraySum: " + mapped->getName()); + throw Exception("Unexpected column for arraySum: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN); } }; diff --git a/dbms/src/Functions/base64Decode.cpp b/dbms/src/Functions/base64Decode.cpp new file mode 100644 index 0000000000..e79e6bcb18 --- /dev/null +++ b/dbms/src/Functions/base64Decode.cpp @@ -0,0 +1,13 @@ +#include +#if USE_BASE64 +#include +#include + +namespace DB +{ +void registerFunctionBase64Decode(FunctionFactory & factory) +{ + factory.registerFunction>(); +} +} +#endif diff --git a/dbms/src/Functions/base64Encode.cpp b/dbms/src/Functions/base64Encode.cpp new file mode 100644 index 0000000000..9aeb3df40a --- /dev/null +++ b/dbms/src/Functions/base64Encode.cpp @@ -0,0 +1,13 @@ +#include +#if USE_BASE64 +#include +#include + +namespace DB +{ +void registerFunctionBase64Encode(FunctionFactory & factory) +{ + factory.registerFunction>(); +} +} +#endif diff --git a/dbms/src/Functions/formatDateTime.cpp b/dbms/src/Functions/formatDateTime.cpp index 874cfcfcd5..d9d76ab936 100644 --- a/dbms/src/Functions/formatDateTime.cpp +++ b/dbms/src/Functions/formatDateTime.cpp @@ -350,7 +350,7 @@ public: for (size_t i = 0; i < vec.size(); ++i) { - for(auto & instruction : instructions) + for (auto & instruction : instructions) instruction.perform(pos, vec[i], time_zone); dst_offsets[i] = pos - begin; diff --git a/dbms/src/Functions/registerFunctionsString.cpp b/dbms/src/Functions/registerFunctionsString.cpp index 9adf3a5282..5d4b05acdf 100644 --- a/dbms/src/Functions/registerFunctionsString.cpp +++ b/dbms/src/Functions/registerFunctionsString.cpp @@ -19,6 +19,11 @@ void registerFunctionSubstringUTF8(FunctionFactory &); void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &); void registerFunctionStartsWith(FunctionFactory &); void registerFunctionEndsWith(FunctionFactory &); +#if USE_BASE64 +void registerFunctionBase64Encode(FunctionFactory &); +void registerFunctionBase64Decode(FunctionFactory &); +void registerFunctionTryBase64Decode(FunctionFactory &); +#endif void registerFunctionsString(FunctionFactory & factory) { @@ -38,6 +43,11 @@ void registerFunctionsString(FunctionFactory & factory) registerFunctionAppendTrailingCharIfAbsent(factory); registerFunctionStartsWith(factory); registerFunctionEndsWith(factory); +#if USE_BASE64 + registerFunctionBase64Encode(factory); + registerFunctionBase64Decode(factory); + registerFunctionTryBase64Decode(factory); +#endif } } diff --git a/dbms/src/Functions/substringUTF8.cpp b/dbms/src/Functions/substringUTF8.cpp index aa28a54192..3e384bcba0 100644 --- a/dbms/src/Functions/substringUTF8.cpp +++ b/dbms/src/Functions/substringUTF8.cpp @@ -127,13 +127,13 @@ public: const ColumnPtr column_length = block.getByPosition(arguments[2]).column; if (!column_start->isColumnConst() || !column_length->isColumnConst()) - throw Exception("2nd and 3rd arguments of function " + getName() + " must be constants."); + throw Exception("2nd and 3rd arguments of function " + getName() + " must be constants.", ErrorCodes::ILLEGAL_COLUMN); Field start_field = (*block.getByPosition(arguments[1]).column)[0]; Field length_field = (*block.getByPosition(arguments[2]).column)[0]; if (start_field.getType() != Field::Types::UInt64 || length_field.getType() != Field::Types::UInt64) - throw Exception("2nd and 3rd arguments of function " + getName() + " must be non-negative and must have UInt type."); + throw Exception("2nd and 3rd arguments of function " + getName() + " must be non-negative and must have UInt type.", ErrorCodes::ILLEGAL_COLUMN); UInt64 start = start_field.get(); UInt64 length = length_field.get(); diff --git a/dbms/src/Functions/tests/CMakeLists.txt b/dbms/src/Functions/tests/CMakeLists.txt index b0fd9d1642..91bfa4bc27 100644 --- a/dbms/src/Functions/tests/CMakeLists.txt +++ b/dbms/src/Functions/tests/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (number_traits number_traits.cpp) -target_link_libraries (number_traits dbms) +target_link_libraries (number_traits PRIVATE dbms) diff --git a/dbms/src/Functions/tryBase64Decode.cpp b/dbms/src/Functions/tryBase64Decode.cpp new file mode 100644 index 0000000000..a475745265 --- /dev/null +++ b/dbms/src/Functions/tryBase64Decode.cpp @@ -0,0 +1,13 @@ +#include +#if USE_BASE64 +#include +#include + +namespace DB +{ +void registerFunctionTryBase64Decode(FunctionFactory & factory) +{ + factory.registerFunction>(); +} +} +#endif diff --git a/dbms/src/IO/HTTPCommon.cpp b/dbms/src/IO/HTTPCommon.cpp index b789e63609..c6b7e47225 100644 --- a/dbms/src/IO/HTTPCommon.cpp +++ b/dbms/src/IO/HTTPCommon.cpp @@ -1,9 +1,10 @@ #include +#include #include #include #include -#include + #if USE_POCO_NETSSL #include #include @@ -13,22 +14,150 @@ #include #include #endif + +#include +#include #include #include +#include +#include +#include #include + +namespace ProfileEvents +{ + extern const Event CreatedHTTPConnections; +} + namespace DB { - namespace ErrorCodes { extern const int RECEIVED_ERROR_FROM_REMOTE_IO_SERVER; extern const int RECEIVED_ERROR_TOO_MANY_REQUESTS; extern const int FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME; + extern const int UNSUPPORTED_URI_SCHEME; } +namespace +{ + void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts) + { +#if POCO_CLICKHOUSE_PATCH || POCO_VERSION >= 0x02000000 + session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout); +#else + session.setTimeout(std::max({timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout})); +#endif + } + + bool isHTTPS(const Poco::URI & uri) + { + if (uri.getScheme() == "https") + return true; + else if (uri.getScheme() == "http") + return false; + else + throw Exception("Unsupported scheme in URI '" + uri.toString() + "'", ErrorCodes::UNSUPPORTED_URI_SCHEME); + } + + HTTPSessionPtr makeHTTPSessionImpl(const std::string & host, UInt16 port, bool https, bool keep_alive) + { + HTTPSessionPtr session; + + if (https) +#if USE_POCO_NETSSL + session = std::make_shared(); +#else + throw Exception("ClickHouse was built without HTTPS support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); +#endif + else + session = std::make_shared(); + + ProfileEvents::increment(ProfileEvents::CreatedHTTPConnections); + + session->setHost(DNSResolver::instance().resolveHost(host).toString()); + session->setPort(port); + + /// doesn't work properly without patch +#if POCO_CLICKHOUSE_PATCH + session->setKeepAlive(keep_alive); +#endif + + return session; + } + + class SingleEndpointHTTPSessionPool : public PoolBase + { + private: + const std::string host; + const UInt16 port; + bool https; + using Base = PoolBase; + + ObjectPtr allocObject() override + { + return makeHTTPSessionImpl(host, port, https, true); + } + + public: + SingleEndpointHTTPSessionPool(const std::string & host_, UInt16 port_, bool https_, size_t max_pool_size_) + : Base(max_pool_size_, &Poco::Logger::get("HTTPSessionPool")), host(host_), port(port_), https(https_) + { + } + }; + + class HTTPSessionPool : public ext::singleton + { + private: + using Key = std::tuple; + using PoolPtr = std::shared_ptr; + using Entry = SingleEndpointHTTPSessionPool::Entry; + + friend class ext::singleton; + + struct Hasher + { + size_t operator()(const Key & k) const + { + SipHash s; + s.update(std::get<0>(k)); + s.update(std::get<1>(k)); + s.update(std::get<2>(k)); + return s.get64(); + } + }; + + std::mutex mutex; + std::unordered_map endpoints_pool; + + protected: + HTTPSessionPool() = default; + + public: + Entry getSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, size_t max_connections_per_endpoint) + { + std::unique_lock lock(mutex); + const std::string & host = uri.getHost(); + UInt16 port = uri.getPort(); + bool https = isHTTPS(uri); + auto key = std::make_tuple(host, port, https); + auto pool_ptr = endpoints_pool.find(key); + if (pool_ptr == endpoints_pool.end()) + std::tie(pool_ptr, std::ignore) = endpoints_pool.emplace( + key, std::make_shared(host, port, https, max_connections_per_endpoint)); + + auto retry_timeout = timeouts.connection_timeout.totalMicroseconds(); + auto session = pool_ptr->second->get(retry_timeout); + + setTimeouts(*session, timeouts); + return session; + } + }; +} + void setResponseDefaultHeaders(Poco::Net::HTTPServerResponse & response, unsigned keep_alive_timeout) { if (!response.getKeepAlive()) @@ -39,33 +168,24 @@ void setResponseDefaultHeaders(Poco::Net::HTTPServerResponse & response, unsigne response.set("Keep-Alive", "timeout=" + std::to_string(timeout.totalSeconds())); } -std::unique_ptr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts) +HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts) { - bool is_ssl = static_cast(uri.getScheme() == "https"); - std::unique_ptr session; - - if (is_ssl) -#if USE_POCO_NETSSL - session = std::make_unique(); -#else - throw Exception("ClickHouse was built without HTTPS support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); -#endif - else - session = std::make_unique(); - - session->setHost(DNSResolver::instance().resolveHost(uri.getHost()).toString()); - session->setPort(uri.getPort()); - -#if POCO_CLICKHOUSE_PATCH || POCO_VERSION >= 0x02000000 - session->setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout); -#else - session->setTimeout(std::max({timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout})); -#endif + const std::string & host = uri.getHost(); + UInt16 port = uri.getPort(); + bool https = isHTTPS(uri); + auto session = makeHTTPSessionImpl(host, port, https, false); + setTimeouts(*session, timeouts); return session; } +PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size) +{ + return HTTPSessionPool::instance().getSession(uri, timeouts, per_endpoint_pool_size); +} + + std::istream * receiveResponse( Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response) { diff --git a/dbms/src/IO/HTTPCommon.h b/dbms/src/IO/HTTPCommon.h index 011f718e20..6dc669c248 100644 --- a/dbms/src/IO/HTTPCommon.h +++ b/dbms/src/IO/HTTPCommon.h @@ -1,43 +1,60 @@ #pragma once -#include -#include #include +#include +#include #include #include #include #include +#include #include namespace Poco { - namespace Net - { - class HTTPServerResponse; - } +namespace Net +{ + class HTTPServerResponse; +} } namespace DB { +constexpr int HTTP_TOO_MANY_REQUESTS = 429; -const int HTTP_TOO_MANY_REQUESTS = 429; +class SingleEndpointHTTPSessionPool : public PoolBase +{ +private: + const std::string host; + const UInt16 port; + const bool https; + using Base = PoolBase; + + ObjectPtr allocObject() override; + +public: + SingleEndpointHTTPSessionPool(const std::string & host_, UInt16 port_, bool https_, size_t max_pool_size_); +}; +using PooledHTTPSessionPtr = SingleEndpointHTTPSessionPool::Entry; +using HTTPSessionPtr = std::shared_ptr; void setResponseDefaultHeaders(Poco::Net::HTTPServerResponse & response, unsigned keep_alive_timeout); - /// Create session object to perform requests and set required parameters. -std::unique_ptr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts); +HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts); +/// As previous method creates session, but tooks it from pool +PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size); /** Used to receive response (response headers and possibly body) * after sending data (request headers and possibly body). * Throws exception in case of non HTTP_OK (200) response code. * Returned istream lives in 'session' object. */ -std::istream * receiveResponse(Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response); - +std::istream * receiveResponse( + Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response); } diff --git a/dbms/src/IO/ReadBufferFromPocoSocket.cpp b/dbms/src/IO/ReadBufferFromPocoSocket.cpp index 9dc21c1971..e44b8c99c5 100644 --- a/dbms/src/IO/ReadBufferFromPocoSocket.cpp +++ b/dbms/src/IO/ReadBufferFromPocoSocket.cpp @@ -36,7 +36,7 @@ bool ReadBufferFromPocoSocket::nextImpl() } catch (const Poco::Net::NetException & e) { - throw NetException(e.displayText(), "while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); + throw NetException(e.displayText() + ", while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); } catch (const Poco::TimeoutException &) { @@ -44,7 +44,7 @@ bool ReadBufferFromPocoSocket::nextImpl() } catch (const Poco::IOException & e) { - throw NetException(e.displayText(), "while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); + throw NetException(e.displayText() + ", while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); } if (bytes_read < 0) diff --git a/dbms/src/IO/ReadHelpers.h b/dbms/src/IO/ReadHelpers.h index bb012585a7..dcf866848c 100644 --- a/dbms/src/IO/ReadHelpers.h +++ b/dbms/src/IO/ReadHelpers.h @@ -58,7 +58,7 @@ namespace ErrorCodes inline char parseEscapeSequence(char c) { - switch(c) + switch (c) { case 'a': return '\a'; diff --git a/dbms/src/IO/ReadWriteBufferFromHTTP.cpp b/dbms/src/IO/ReadWriteBufferFromHTTP.cpp index 467f040ef3..4d046bfe2c 100644 --- a/dbms/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/dbms/src/IO/ReadWriteBufferFromHTTP.cpp @@ -1,65 +1 @@ #include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - - -ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP(Poco::URI uri, - const std::string & method_, - OutStreamCallback out_stream_callback, - const ConnectionTimeouts & timeouts, - const Poco::Net::HTTPBasicCredentials & credentials, - size_t buffer_size_) - : ReadBuffer(nullptr, 0), - uri{uri}, - method{!method_.empty() ? method_ : out_stream_callback ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET}, - session{makeHTTPSession(uri, timeouts)} -{ - // With empty path poco will send "POST HTTP/1.1" its bug. - if (uri.getPath().empty()) - uri.setPath("/"); - - Poco::Net::HTTPRequest request(method, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); - request.setHost(uri.getHost()); // use original, not resolved host name in header - - if (out_stream_callback) - request.setChunkedTransferEncoding(true); - - if (!credentials.getUsername().empty()) - credentials.authenticate(request); - - Poco::Net::HTTPResponse response; - - LOG_TRACE((&Logger::get("ReadWriteBufferFromHTTP")), "Sending request to " << uri.toString()); - - auto & stream_out = session->sendRequest(request); - - if (out_stream_callback) - out_stream_callback(stream_out); - - istr = receiveResponse(*session, request, response); - - impl = std::make_unique(*istr, buffer_size_); -} - - -bool ReadWriteBufferFromHTTP::nextImpl() -{ - if (!impl->next()) - return false; - internal_buffer = impl->buffer(); - working_buffer = internal_buffer; - return true; -} - -} diff --git a/dbms/src/IO/ReadWriteBufferFromHTTP.h b/dbms/src/IO/ReadWriteBufferFromHTTP.h index 363c9becdd..215aa29a3e 100644 --- a/dbms/src/IO/ReadWriteBufferFromHTTP.h +++ b/dbms/src/IO/ReadWriteBufferFromHTTP.h @@ -1,42 +1,132 @@ #pragma once #include +#include +#include +#include +#include +#include #include #include +#include +#include #include -#include -#include +#include +#include +#include +#include + #define DEFAULT_HTTP_READ_BUFFER_TIMEOUT 1800 #define DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT 1 namespace DB { - /** Perform HTTP POST request and provide response to read. */ -class ReadWriteBufferFromHTTP : public ReadBuffer -{ -private: - Poco::URI uri; - std::string method; - std::unique_ptr session; - std::istream * istr; /// owned by session - std::unique_ptr impl; +namespace detail +{ + template + class ReadWriteBufferFromHTTPBase : public ReadBuffer + { + protected: + Poco::URI uri; + std::string method; + + SessionPtr session; + std::istream * istr; /// owned by session + std::unique_ptr impl; + + public: + using OutStreamCallback = std::function; + + explicit ReadWriteBufferFromHTTPBase(SessionPtr session_, + Poco::URI uri, + const std::string & method = {}, + OutStreamCallback out_stream_callback = {}, + const Poco::Net::HTTPBasicCredentials & credentials = {}, + size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE) + : ReadBuffer(nullptr, 0) + , uri {uri} + , method {!method.empty() ? method : out_stream_callback ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET} + , session {session_} + { + // With empty path poco will send "POST HTTP/1.1" its bug. + if (uri.getPath().empty()) + uri.setPath("/"); + + Poco::Net::HTTPRequest request(method, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); + request.setHost(uri.getHost()); // use original, not resolved host name in header + + if (out_stream_callback) + request.setChunkedTransferEncoding(true); + + if (!credentials.getUsername().empty()) + credentials.authenticate(request); + + Poco::Net::HTTPResponse response; + + LOG_TRACE((&Logger::get("ReadWriteBufferFromHTTP")), "Sending request to " << uri.toString()); + + auto & stream_out = session->sendRequest(request); + + if (out_stream_callback) + out_stream_callback(stream_out); + + istr = receiveResponse(*session, request, response); + + impl = std::make_unique(*istr, buffer_size_); + } + + + bool nextImpl() override + { + if (!impl->next()) + return false; + internal_buffer = impl->buffer(); + working_buffer = internal_buffer; + return true; + } + }; +} + +class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase +{ + using Parent = detail::ReadWriteBufferFromHTTPBase; public: - using OutStreamCallback = std::function; - - explicit ReadWriteBufferFromHTTP( - Poco::URI uri, + explicit ReadWriteBufferFromHTTP(Poco::URI uri, const std::string & method = {}, OutStreamCallback out_stream_callback = {}, const ConnectionTimeouts & timeouts = {}, const Poco::Net::HTTPBasicCredentials & credentials = {}, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE); + size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE) + : Parent(makeHTTPSession(uri, timeouts), uri, method, out_stream_callback, credentials, buffer_size_) + { + } +}; +class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase +{ + using Parent = detail::ReadWriteBufferFromHTTPBase; - bool nextImpl() override; +public: + explicit PooledReadWriteBufferFromHTTP(Poco::URI uri, + const std::string & method = {}, + OutStreamCallback out_stream_callback = {}, + const ConnectionTimeouts & timeouts = {}, + const Poco::Net::HTTPBasicCredentials & credentials = {}, + size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, + size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT) + : Parent(makePooledHTTPSession(uri, timeouts, max_connections_per_endpoint), + uri, + method, + out_stream_callback, + credentials, + buffer_size_) + { + } }; + } diff --git a/dbms/src/IO/WriteBufferFromHTTP.cpp b/dbms/src/IO/WriteBufferFromHTTP.cpp index 51472de85e..c74c74a0bd 100644 --- a/dbms/src/IO/WriteBufferFromHTTP.cpp +++ b/dbms/src/IO/WriteBufferFromHTTP.cpp @@ -1,6 +1,5 @@ #include -#include #include diff --git a/dbms/src/IO/WriteBufferFromHTTP.h b/dbms/src/IO/WriteBufferFromHTTP.h index 12eed48021..c68b8f88d3 100644 --- a/dbms/src/IO/WriteBufferFromHTTP.h +++ b/dbms/src/IO/WriteBufferFromHTTP.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,7 @@ namespace DB class WriteBufferFromHTTP : public WriteBufferFromOStream { private: - std::unique_ptr session; + HTTPSessionPtr session; Poco::Net::HTTPRequest request; Poco::Net::HTTPResponse response; diff --git a/dbms/src/IO/WriteBufferFromPocoSocket.cpp b/dbms/src/IO/WriteBufferFromPocoSocket.cpp index 0712b2c060..09fca48c6f 100644 --- a/dbms/src/IO/WriteBufferFromPocoSocket.cpp +++ b/dbms/src/IO/WriteBufferFromPocoSocket.cpp @@ -43,7 +43,7 @@ void WriteBufferFromPocoSocket::nextImpl() } catch (const Poco::Net::NetException & e) { - throw NetException(e.displayText() + " while writing to socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); + throw NetException(e.displayText() + ", while writing to socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); } catch (const Poco::TimeoutException &) { @@ -51,7 +51,7 @@ void WriteBufferFromPocoSocket::nextImpl() } catch (const Poco::IOException & e) { - throw NetException(e.displayText(), " while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); + throw NetException(e.displayText() + ", while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); } if (res < 0) diff --git a/dbms/src/IO/WriteHelpers.cpp b/dbms/src/IO/WriteHelpers.cpp index ba0e106b42..5dc2358c4c 100644 --- a/dbms/src/IO/WriteHelpers.cpp +++ b/dbms/src/IO/WriteHelpers.cpp @@ -64,7 +64,7 @@ void writeException(const Exception & e, WriteBuffer & buf, bool with_stack_trac writeBinary(has_nested, buf); if (has_nested) - writeException(Exception(*e.nested()), buf, with_stack_trace); + writeException(Exception(Exception::CreateFromPoco, *e.nested()), buf, with_stack_trace); } } diff --git a/dbms/src/IO/tests/CMakeLists.txt b/dbms/src/IO/tests/CMakeLists.txt index f75ad8523f..75398ff980 100644 --- a/dbms/src/IO/tests/CMakeLists.txt +++ b/dbms/src/IO/tests/CMakeLists.txt @@ -1,84 +1,84 @@ add_executable (read_buffer read_buffer.cpp) -target_link_libraries (read_buffer clickhouse_common_io) +target_link_libraries (read_buffer PRIVATE clickhouse_common_io) add_executable (read_buffer_perf read_buffer_perf.cpp) -target_link_libraries (read_buffer_perf clickhouse_common_io) +target_link_libraries (read_buffer_perf PRIVATE clickhouse_common_io) add_executable (read_float_perf read_float_perf.cpp) -target_link_libraries (read_float_perf clickhouse_common_io) +target_link_libraries (read_float_perf PRIVATE clickhouse_common_io) add_executable (write_buffer write_buffer.cpp) -target_link_libraries (write_buffer clickhouse_common_io) +target_link_libraries (write_buffer PRIVATE clickhouse_common_io) add_executable (write_buffer_perf write_buffer_perf.cpp) -target_link_libraries (write_buffer_perf clickhouse_common_io) +target_link_libraries (write_buffer_perf PRIVATE clickhouse_common_io) add_executable (valid_utf8_perf valid_utf8_perf.cpp) -target_link_libraries (valid_utf8_perf clickhouse_common_io) +target_link_libraries (valid_utf8_perf PRIVATE clickhouse_common_io) add_executable (valid_utf8 valid_utf8.cpp) -target_link_libraries (valid_utf8 clickhouse_common_io) +target_link_libraries (valid_utf8 PRIVATE clickhouse_common_io) add_executable (compressed_buffer compressed_buffer.cpp) -target_link_libraries (compressed_buffer clickhouse_common_io) +target_link_libraries (compressed_buffer PRIVATE clickhouse_common_io) add_executable (var_uint var_uint.cpp) -target_link_libraries (var_uint clickhouse_common_io) +target_link_libraries (var_uint PRIVATE clickhouse_common_io) add_executable (read_escaped_string read_escaped_string.cpp) -target_link_libraries (read_escaped_string clickhouse_common_io) +target_link_libraries (read_escaped_string PRIVATE clickhouse_common_io) add_executable (async_write async_write.cpp) -target_link_libraries (async_write clickhouse_common_io) +target_link_libraries (async_write PRIVATE clickhouse_common_io) add_executable (parse_int_perf parse_int_perf.cpp) -target_link_libraries (parse_int_perf clickhouse_common_io) +target_link_libraries (parse_int_perf PRIVATE clickhouse_common_io) add_executable (parse_int_perf2 parse_int_perf2.cpp) -target_link_libraries (parse_int_perf2 clickhouse_common_io) +target_link_libraries (parse_int_perf2 PRIVATE clickhouse_common_io) add_executable (read_write_int read_write_int.cpp) -target_link_libraries (read_write_int clickhouse_common_io) +target_link_libraries (read_write_int PRIVATE clickhouse_common_io) add_executable (mempbrk mempbrk.cpp) -target_link_libraries (mempbrk clickhouse_common_io) +target_link_libraries (mempbrk PRIVATE clickhouse_common_io) add_executable (cached_compressed_read_buffer cached_compressed_read_buffer.cpp) -target_link_libraries (cached_compressed_read_buffer clickhouse_common_io) +target_link_libraries (cached_compressed_read_buffer PRIVATE clickhouse_common_io) add_executable (o_direct_and_dirty_pages o_direct_and_dirty_pages.cpp) -target_link_libraries (o_direct_and_dirty_pages clickhouse_common_io) +target_link_libraries (o_direct_and_dirty_pages PRIVATE clickhouse_common_io) add_executable (hashing_write_buffer hashing_write_buffer.cpp) -target_link_libraries (hashing_write_buffer clickhouse_common_io) +target_link_libraries (hashing_write_buffer PRIVATE clickhouse_common_io) add_check(hashing_write_buffer) add_executable (hashing_read_buffer hashing_read_buffer.cpp) -target_link_libraries (hashing_read_buffer clickhouse_common_io) +target_link_libraries (hashing_read_buffer PRIVATE clickhouse_common_io) add_check (hashing_read_buffer) add_executable (io_operators operators.cpp) -target_link_libraries (io_operators clickhouse_common_io) +target_link_libraries (io_operators PRIVATE clickhouse_common_io) if (OS_LINUX) add_executable(write_buffer_aio write_buffer_aio.cpp) - target_link_libraries (write_buffer_aio clickhouse_common_io ${Boost_FILESYSTEM_LIBRARY}) + target_link_libraries (write_buffer_aio PRIVATE clickhouse_common_io ${Boost_FILESYSTEM_LIBRARY}) add_executable(read_buffer_aio read_buffer_aio.cpp) - target_link_libraries (read_buffer_aio clickhouse_common_io ${Boost_FILESYSTEM_LIBRARY}) + target_link_libraries (read_buffer_aio PRIVATE clickhouse_common_io ${Boost_FILESYSTEM_LIBRARY}) endif () add_executable (zlib_buffers zlib_buffers.cpp) -target_link_libraries (zlib_buffers clickhouse_common_io) +target_link_libraries (zlib_buffers PRIVATE clickhouse_common_io) add_executable (limit_read_buffer limit_read_buffer.cpp) -target_link_libraries (limit_read_buffer clickhouse_common_io) +target_link_libraries (limit_read_buffer PRIVATE clickhouse_common_io) add_executable (limit_read_buffer2 limit_read_buffer2.cpp) -target_link_libraries (limit_read_buffer2 clickhouse_common_io) +target_link_libraries (limit_read_buffer2 PRIVATE clickhouse_common_io) add_executable (parse_date_time_best_effort parse_date_time_best_effort.cpp) -target_link_libraries (parse_date_time_best_effort clickhouse_common_io) +target_link_libraries (parse_date_time_best_effort PRIVATE clickhouse_common_io) add_executable (zlib_ng_bug zlib_ng_bug.cpp) -target_link_libraries (zlib_ng_bug ${Poco_Foundation_LIBRARY}) +target_link_libraries (zlib_ng_bug PRIVATE ${Poco_Foundation_LIBRARY}) diff --git a/dbms/src/IO/tests/compressed_buffer.cpp b/dbms/src/IO/tests/compressed_buffer.cpp index 22e5017f44..bcb8d7ae9c 100644 --- a/dbms/src/IO/tests/compressed_buffer.cpp +++ b/dbms/src/IO/tests/compressed_buffer.cpp @@ -55,7 +55,7 @@ int main(int, char **) { std::stringstream s; s << "Failed!, read: " << x << ", expected: " << i; - throw DB::Exception(s.str()); + throw DB::Exception(s.str(), 0); } } stopwatch.stop(); diff --git a/dbms/src/IO/tests/limit_read_buffer2.cpp b/dbms/src/IO/tests/limit_read_buffer2.cpp index f9a9218174..826fb048a0 100644 --- a/dbms/src/IO/tests/limit_read_buffer2.cpp +++ b/dbms/src/IO/tests/limit_read_buffer2.cpp @@ -6,6 +6,15 @@ #include +namespace DB +{ + namespace ErrorCodes + { + extern const int LOGICAL_ERROR; + } +} + + int main(int, char **) try { @@ -31,18 +40,18 @@ try if (limit_in.count() != 1) { s << "Failed!, incorrect count(): " << limit_in.count(); - throw Exception(s.str()); + throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); } if (in.count() != limit_in.count()) { s << "Failed!, incorrect underlying buffer's count(): " << in.count(); - throw Exception(s.str()); + throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); } if (src != dst) { s << "Failed!, incorrect destination value, read: " << dst << ", expected: " << src; - throw Exception(s.str()); + throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); } } { @@ -65,20 +74,20 @@ try if (in.count() != 2) { s << "Failed!, Incorrect underlying buffer's count: " << in.count() << ", expected: " << 2; - throw Exception(s.str()); + throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); } if (limit_in.count() != 1) { s << "Failed!, Incorrect count: " << limit_in.count() << ", expected: " << 1; - throw Exception(s.str()); + throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); } } if (dst != "b") { s << "Failed!, Incorrect destination value: " << dst << ", expected 'b'"; - throw Exception(dst); + throw Exception(dst, ErrorCodes::LOGICAL_ERROR); } char y; @@ -86,14 +95,14 @@ try if (y != 'c') { s << "Failed!, Read incorrect value from underlying buffer: " << y << ", expected 'c'"; - throw Exception(s.str()); + throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); } while (!in.eof()) in.ignore(); if (in.count() != 3) { s << "Failed!, Incorrect final count from underlying buffer: " << in.count() << ", expected: 3"; - throw Exception(s.str()); + throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); } } @@ -110,14 +119,14 @@ try if (limit_in.count() != 1) { s << "Failed!, Incorrect count: " << limit_in.count() << ", expected: " << 1; - throw Exception(s.str()); + throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); } } if (in.count() != 1) { s << "Failed!, Incorrect final count from underlying buffer: " << in.count() << ", expected: 1"; - throw Exception(s.str()); + throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); } } diff --git a/dbms/src/IO/tests/zlib_buffers.cpp b/dbms/src/IO/tests/zlib_buffers.cpp index af4b003766..2c55509deb 100644 --- a/dbms/src/IO/tests/zlib_buffers.cpp +++ b/dbms/src/IO/tests/zlib_buffers.cpp @@ -51,7 +51,7 @@ try inflating_buf.ignore(); if (x != i) - throw DB::Exception("Failed!, read: " + std::to_string(x) + ", expected: " + std::to_string(i)); + throw DB::Exception("Failed!, read: " + std::to_string(x) + ", expected: " + std::to_string(i), 0); } stopwatch.stop(); std::cout << "Reading done. Elapsed: " << stopwatch.elapsedSeconds() << " s." diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 2019ad9f04..bfa8d51c9c 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -1352,7 +1352,8 @@ BlocksList Aggregator::prepareBlocksAndFillTwoLevelImpl( { auto converter = [&](size_t bucket, ThreadGroupStatusPtr thread_group) { - CurrentThread::attachToIfDetached(thread_group); + if (thread_group) + CurrentThread::attachToIfDetached(thread_group); return convertOneBucketToBlock(data_variants, method, final, bucket); }; @@ -1805,7 +1806,8 @@ private: try { setThreadName("MergingAggregtd"); - CurrentThread::attachToIfDetached(thread_group); + if (thread_group) + CurrentThread::attachToIfDetached(thread_group); CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; /// TODO: add no_more_keys support maybe @@ -2127,7 +2129,8 @@ void Aggregator::mergeStream(const BlockInputStreamPtr & stream, AggregatedDataV auto merge_bucket = [&bucket_to_blocks, &result, this](Int32 bucket, Arena * aggregates_pool, ThreadGroupStatusPtr thread_group) { - CurrentThread::attachToIfDetached(thread_group); + if (thread_group) + CurrentThread::attachToIfDetached(thread_group); for (Block & block : bucket_to_blocks[bucket]) { diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 6e7051ba38..505d7b5918 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -50,6 +50,7 @@ #include #include +#include #include @@ -199,6 +200,9 @@ struct ContextShared pcg64 rng{randomSeed()}; + /// vector of xdbc-bridge commands, they will be killed when Context will be destroyed + std::vector> bridge_commands; + Context::ConfigReloadCallback config_reload_callback; ContextShared(std::shared_ptr runtime_components_factory_) @@ -1844,6 +1848,13 @@ void Context::dropCompiledExpressionCache() const #endif + +void Context::addXDBCBridgeCommand(std::unique_ptr cmd) +{ + auto lock = getLock(); + shared->bridge_commands.emplace_back(std::move(cmd)); +} + std::shared_ptr Context::getActionLocksManager() { auto lock = getLock(); diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index ad6c12905d..af189b8abe 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -79,6 +79,7 @@ struct SystemLogs; using SystemLogsPtr = std::shared_ptr; class ActionLocksManager; using ActionLocksManagerPtr = std::shared_ptr; +class ShellCommand; #if USE_EMBEDDED_COMPILER @@ -445,6 +446,9 @@ public: void dropCompiledExpressionCache() const; #endif + /// Add started bridge command. It will be killed after context destruction + void addXDBCBridgeCommand(std::unique_ptr cmd); + private: /** Check if the current client has access to the specified database. * If access is denied, throw an exception. diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index a2c0a9ceb4..11a7ecfb1a 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -1150,7 +1150,7 @@ UInt128 ExpressionAction::ActionHash::operator()(const ExpressionAction & action SipHash hash; hash.update(action.type); hash.update(action.is_function_compiled); - switch(action.type) + switch (action.type) { case ADD_COLUMN: hash.update(action.result_name); diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 55e04b7a88..79117744f2 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1087,7 +1087,7 @@ void ExpressionAnalyzer::collectUsedColumns() } joined_block_actions = analyzedJoin().createJoinedBlockActions( - columns_added_by_join, select_query, context, required_columns_from_joined_table); + columns_added_by_join, select_query, context, required_columns_from_joined_table); /// Some columns from right join key may be used in query. This columns will be appended to block during join. for (const auto & right_key_name : analyzedJoin().key_names_right) @@ -1136,7 +1136,9 @@ void ExpressionAnalyzer::collectUsedColumns() } if (!unknown_required_source_columns.empty()) - throw Exception("Unknown identifier: " + *unknown_required_source_columns.begin(), ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception("Unknown identifier: " + *unknown_required_source_columns.begin() + + (select_query && !select_query->tables ? ". Note that there is no tables (FROM clause) in your query" : ""), + ErrorCodes::UNKNOWN_IDENTIFIER); } diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index 31eedff6d1..c3bab56edc 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -54,41 +54,10 @@ BlockIO InterpreterAlterQuery::execute() table->mutate(mutation_commands, context); } - partition_commands.validate(*table); - for (const PartitionCommand & command : partition_commands) + if (!partition_commands.empty()) { - switch (command.type) - { - case PartitionCommand::DROP_PARTITION: - table->checkPartitionCanBeDropped(command.partition); - table->dropPartition(query_ptr, command.partition, command.detach, context); - break; - - case PartitionCommand::ATTACH_PARTITION: - table->attachPartition(command.partition, command.part, context); - break; - - case PartitionCommand::REPLACE_PARTITION: - { - table->checkPartitionCanBeDropped(command.partition); - String from_database = command.from_database.empty() ? context.getCurrentDatabase() : command.from_database; - auto from_storage = context.getTable(from_database, command.from_table); - table->replacePartitionFrom(from_storage, command.partition, command.replace, context); - } - break; - - case PartitionCommand::FETCH_PARTITION: - table->fetchPartition(command.partition, command.from_zookeeper_path, context); - break; - - case PartitionCommand::FREEZE_PARTITION: - table->freezePartition(command.partition, command.with_name, context); - break; - - case PartitionCommand::CLEAR_COLUMN: - table->clearColumnInPartition(command.partition, command.column_name, context); - break; - } + partition_commands.validate(*table); + table->partition(query_ptr, partition_commands, context); } if (!alter_commands.empty()) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index af2b51e58e..eac6d2d776 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -169,13 +170,15 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) using ColumnsAndDefaults = std::pair; +using ParsedColumns = std::tuple; /// AST to the list of columns with types. Columns of Nested type are expanded into a list of real columns. -static ColumnsAndDefaults parseColumns(const ASTExpressionList & column_list_ast, const Context & context) +static ParsedColumns parseColumns(const ASTExpressionList & column_list_ast, const Context & context) { /// list of table columns in correct order NamesAndTypesList columns{}; ColumnDefaults defaults{}; + ColumnComments comments{}; /// Columns requiring type-deduction or default_expression type-check std::vector> defaulted_columns{}; @@ -219,6 +222,12 @@ static ColumnsAndDefaults parseColumns(const ASTExpressionList & column_list_ast else default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), col_decl.name)); } + + if (col_decl.comment) + { + if (auto comment_str = typeid_cast(*col_decl.comment).value.get(); !comment_str.empty()) + comments.emplace(col_decl.name, comment_str); + } } /// set missing types and wrap default_expression's in a conversion-function if necessary @@ -269,7 +278,7 @@ static ColumnsAndDefaults parseColumns(const ASTExpressionList & column_list_ast } } - return {Nested::flatten(columns), defaults}; + return {Nested::flatten(columns), defaults, comments}; } @@ -337,11 +346,17 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) column_declaration->type = parseQuery(storage_p, pos, end, "data type", 0); column_declaration->type->owned_string = type_name; - const auto it = columns.defaults.find(column.name); - if (it != std::end(columns.defaults)) + const auto defaults_it = columns.defaults.find(column.name); + if (defaults_it != std::end(columns.defaults)) { - column_declaration->default_specifier = toString(it->second.kind); - column_declaration->default_expression = it->second.expression->clone(); + column_declaration->default_specifier = toString(defaults_it->second.kind); + column_declaration->default_expression = defaults_it->second.expression->clone(); + } + + const auto comments_it = columns.comments.find(column.name); + if (comments_it != std::end(columns.comments)) + { + column_declaration->comment = std::make_shared(Field(comments_it->second)); } columns_list->children.push_back(column_declaration_ptr); @@ -350,16 +365,17 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) return columns_list; } - ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpressionList & columns, const Context & context) { ColumnsDescription res; - auto && columns_and_defaults = parseColumns(columns, context); + auto && parsed_columns = parseColumns(columns, context); + auto columns_and_defaults = std::make_pair(std::move(std::get<0>(parsed_columns)), std::move(std::get<1>(parsed_columns))); res.materialized = removeAndReturnColumns(columns_and_defaults, ColumnDefaultKind::Materialized); res.aliases = removeAndReturnColumns(columns_and_defaults, ColumnDefaultKind::Alias); res.ordinary = std::move(columns_and_defaults.first); res.defaults = std::move(columns_and_defaults.second); + res.comments = std::move(std::get<2>(parsed_columns)); if (res.ordinary.size() + res.materialized.size() == 0) throw Exception{"Cannot CREATE table without physical columns", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.h b/dbms/src/Interpreters/InterpreterCreateQuery.h index 2f18676486..e450ae0728 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.h +++ b/dbms/src/Interpreters/InterpreterCreateQuery.h @@ -44,7 +44,7 @@ public: internal = internal_; } - /// Obtain information about columns, their types and default values, for case when columns in CREATE query is specified explicitly. + /// Obtain information about columns, their types, default values and column comments, for case when columns in CREATE query is specified explicitly. static ColumnsDescription getColumnsDescription(const ASTExpressionList & columns, const Context & context); /// Check that column types are allowed for usage in table according to settings. static void checkSupportedTypes(const ColumnsDescription & columns, const Context & context); diff --git a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp index 536c554e99..db33383b76 100644 --- a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp @@ -45,6 +45,9 @@ Block InterpreterDescribeQuery::getSampleBlock() col.name = "default_expression"; block.insert(col); + col.name = "comment_expression"; + block.insert(col); + return block; } @@ -55,6 +58,7 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() NamesAndTypesList columns; ColumnDefaults column_defaults; + ColumnComments column_comments; StoragePtr table; auto table_expression = typeid_cast(ast.table_expression.get()); @@ -101,6 +105,7 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() auto table_lock = table->lockStructure(false, __PRETTY_FUNCTION__); columns = table->getColumns().getAll(); column_defaults = table->getColumns().defaults; + column_comments = table->getColumns().comments; } Block sample_block = getSampleBlock(); @@ -111,16 +116,26 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() res_columns[0]->insert(column.name); res_columns[1]->insert(column.type->getName()); - const auto it = column_defaults.find(column.name); - if (it == std::end(column_defaults)) + const auto defaults_it = column_defaults.find(column.name); + if (defaults_it == std::end(column_defaults)) { res_columns[2]->insertDefault(); res_columns[3]->insertDefault(); } else { - res_columns[2]->insert(toString(it->second.kind)); - res_columns[3]->insert(queryToString(it->second.expression)); + res_columns[2]->insert(toString(defaults_it->second.kind)); + res_columns[3]->insert(queryToString(defaults_it->second.expression)); + } + + const auto comments_it = column_comments.find(column.name); + if (comments_it == std::end(column_comments)) + { + res_columns[4]->insertDefault(); + } + else + { + res_columns[4]->insert(comments_it->second); } } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 5ee3b1cc5b..2a139edce9 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -590,7 +590,7 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt if (query.group_by_with_rollup) executeRollupOrCube(pipeline, Modificator::ROLLUP); - else if(query.group_by_with_cube) + else if (query.group_by_with_cube) executeRollupOrCube(pipeline, Modificator::CUBE); } else if (expressions.has_having) diff --git a/dbms/src/Interpreters/PartLog.cpp b/dbms/src/Interpreters/PartLog.cpp index d32fe3983e..31bb5b01b1 100644 --- a/dbms/src/Interpreters/PartLog.cpp +++ b/dbms/src/Interpreters/PartLog.cpp @@ -15,7 +15,7 @@ namespace DB { -template <> struct NearestFieldType { using Type = UInt64; }; +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; Block PartLogElement::createBlock() { diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index b6a5c18dfe..a29e161126 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -9,6 +9,7 @@ #include #include #include +#include "TranslateQualifiedNamesVisitor.h" namespace DB { @@ -26,6 +27,9 @@ bool PredicateExpressionsOptimizer::optimize() if (!settings.enable_optimize_predicate_expression || !ast_select || !ast_select->tables || ast_select->tables->children.empty()) return false; + if (!ast_select->where_expression && !ast_select->prewhere_expression) + return false; + SubqueriesProjectionColumns all_subquery_projection_columns; getAllSubqueryProjectionColumns(all_subquery_projection_columns); @@ -68,7 +72,7 @@ bool PredicateExpressionsOptimizer::optimizeImpl( ASTPtr inner_predicate; cloneOuterPredicateForInnerPredicate(outer_predicate, projection_columns, database_and_table_with_aliases, inner_predicate); - switch(optimize_kind) + switch (optimize_kind) { case OptimizeKind::NONE: continue; case OptimizeKind::PUSH_TO_WHERE: is_rewrite_subquery |= optimizeExpression(inner_predicate, subquery->where_expression, subquery); continue; @@ -300,15 +304,18 @@ void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(SubqueriesProje ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast) { + ASTs projection_columns; + auto select_query = static_cast(ast.get()); + /// first should normalize query tree. std::unordered_map aliases; + std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); + + TranslateQualifiedNamesVisitor({}, tables).visit(ast); QueryAliasesVisitor query_aliases_visitor(aliases); query_aliases_visitor.visit(ast); QueryNormalizer(ast, aliases, settings, {}, {}).perform(); - ASTs projection_columns; - auto select_query = static_cast(ast.get()); - for (const auto & projection_column : select_query->select_expression_list->children) { if (typeid_cast(projection_column.get()) || typeid_cast(projection_column.get())) diff --git a/dbms/src/Interpreters/convertFieldToType.cpp b/dbms/src/Interpreters/convertFieldToType.cpp index 7759c2c9b3..ac9936221d 100644 --- a/dbms/src/Interpreters/convertFieldToType.cpp +++ b/dbms/src/Interpreters/convertFieldToType.cpp @@ -31,6 +31,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int TYPE_MISMATCH; + extern const int TOO_LARGE_STRING_SIZE; } @@ -123,7 +124,7 @@ DayNum stringToDate(const String & s) readDateText(date, in); if (!in.eof()) - throw Exception("String is too long for Date: " + s); + throw Exception("String is too long for Date: " + s, ErrorCodes::TOO_LARGE_STRING_SIZE); return date; } @@ -135,7 +136,7 @@ UInt64 stringToDateTime(const String & s) readDateTimeText(date_time, in); if (!in.eof()) - throw Exception("String is too long for DateTime: " + s); + throw Exception("String is too long for DateTime: " + s, ErrorCodes::TOO_LARGE_STRING_SIZE); return UInt64(date_time); } diff --git a/dbms/src/Interpreters/tests/CMakeLists.txt b/dbms/src/Interpreters/tests/CMakeLists.txt index 01fc72730d..7660527cd8 100644 --- a/dbms/src/Interpreters/tests/CMakeLists.txt +++ b/dbms/src/Interpreters/tests/CMakeLists.txt @@ -1,58 +1,58 @@ add_executable (expression expression.cpp) -target_link_libraries (expression dbms) +target_link_libraries (expression PRIVATE dbms clickhouse_parsers) add_executable (create_query create_query.cpp) -target_link_libraries (create_query dbms) +target_link_libraries (create_query PRIVATE dbms clickhouse_parsers) add_executable (select_query select_query.cpp) -target_link_libraries (select_query clickhouse_storages_system dbms) +target_link_libraries (select_query PRIVATE clickhouse_storages_system dbms clickhouse_common_io) add_executable (aggregate aggregate.cpp) -target_link_libraries (aggregate dbms) +target_link_libraries (aggregate PRIVATE dbms) add_executable (hash_map hash_map.cpp) target_include_directories (hash_map SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR}) -target_link_libraries (hash_map dbms) +target_link_libraries (hash_map PRIVATE dbms) add_executable (hash_map3 hash_map3.cpp) -target_link_libraries (hash_map3 dbms ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES}) +target_link_libraries (hash_map3 PRIVATE dbms ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES}) add_executable (hash_map_string hash_map_string.cpp) target_include_directories (hash_map_string SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR}) -target_link_libraries (hash_map_string dbms) +target_link_libraries (hash_map_string PRIVATE dbms) add_executable (hash_map_string_2 hash_map_string_2.cpp) -target_link_libraries (hash_map_string_2 dbms) +target_link_libraries (hash_map_string_2 PRIVATE dbms) add_executable (hash_map_string_3 hash_map_string_3.cpp) -target_link_libraries (hash_map_string_3 dbms ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES}) +target_link_libraries (hash_map_string_3 PRIVATE dbms ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES}) add_executable (hash_map_string_small hash_map_string_small.cpp) target_include_directories (hash_map_string_small SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR}) -target_link_libraries (hash_map_string_small dbms) +target_link_libraries (hash_map_string_small PRIVATE dbms) add_executable (two_level_hash_map two_level_hash_map.cpp) target_include_directories (two_level_hash_map SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR}) -target_link_libraries (two_level_hash_map dbms) +target_link_libraries (two_level_hash_map PRIVATE dbms) add_executable (compiler_test compiler_test.cpp) -target_link_libraries (compiler_test dbms) +target_link_libraries (compiler_test PRIVATE dbms) add_executable (logical_expressions_optimizer logical_expressions_optimizer.cpp) -target_link_libraries (logical_expressions_optimizer dbms) +target_link_libraries (logical_expressions_optimizer PRIVATE dbms clickhouse_parsers) add_executable (in_join_subqueries_preprocessor in_join_subqueries_preprocessor.cpp) -target_link_libraries (in_join_subqueries_preprocessor dbms) +target_link_libraries (in_join_subqueries_preprocessor PRIVATE dbms clickhouse_parsers) add_check(in_join_subqueries_preprocessor) add_executable (expression_analyzer expression_analyzer.cpp) -target_link_libraries (expression_analyzer dbms clickhouse_storages_system) +target_link_libraries (expression_analyzer PRIVATE dbms clickhouse_storages_system clickhouse_parsers clickhouse_common_io) add_check(expression_analyzer) add_executable (users users.cpp) -target_link_libraries (users dbms ${Boost_FILESYSTEM_LIBRARY}) +target_link_libraries (users PRIVATE dbms clickhouse_common_config ${Boost_FILESYSTEM_LIBRARY}) if (OS_LINUX) add_executable (internal_iotop internal_iotop.cpp) - target_link_libraries (internal_iotop dbms) + target_link_libraries (internal_iotop PRIVATE dbms) endif () diff --git a/dbms/src/Interpreters/tests/expression_analyzer.cpp b/dbms/src/Interpreters/tests/expression_analyzer.cpp index 8fb5fd88c1..fa9cef39ad 100644 --- a/dbms/src/Interpreters/tests/expression_analyzer.cpp +++ b/dbms/src/Interpreters/tests/expression_analyzer.cpp @@ -20,6 +20,14 @@ using namespace DB; +namespace DB +{ + namespace ErrorCodes + { + extern const int SYNTAX_ERROR; + } +} + struct TestEntry { String query; @@ -67,7 +75,7 @@ private: auto text = query.data(); if (ASTPtr ast = tryParseQuery(parser, text, text + query.size(), message, false, "", false, 0)) return ast; - throw Exception(message); + throw Exception(message, ErrorCodes::SYNTAX_ERROR); } }; diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index 8869e873c7..63dd634c78 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -128,6 +128,16 @@ void ASTAlterCommand::formatImpl( << " " << std::quoted(with_name, '\''); } } + else if (type == ASTAlterCommand::FREEZE_ALL) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FREEZE"; + + if (!with_name.empty()) + { + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "") + << " " << std::quoted(with_name, '\''); + } + } else if (type == ASTAlterCommand::DELETE) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DELETE WHERE " << (settings.hilite ? hilite_none : ""); @@ -141,6 +151,13 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : ""); predicate->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::COMMENT_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (settings.hilite ? hilite_none : ""); + column->formatImpl(settings, state, frame); + settings.ostr << " " << (settings.hilite ? hilite_none : ""); + comment->formatImpl(settings, state, frame); + } else throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); } diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index 9bfcdae242..c4598c96d1 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -14,6 +14,7 @@ namespace DB * DROP COLUMN col_drop [FROM PARTITION partition], * MODIFY COLUMN col_name type, * DROP PARTITION partition, + * COMMENT_COLUMN col_name 'comment', */ class ASTAlterCommand : public IAST @@ -25,12 +26,14 @@ public: DROP_COLUMN, MODIFY_COLUMN, MODIFY_PRIMARY_KEY, + COMMENT_COLUMN, DROP_PARTITION, ATTACH_PARTITION, REPLACE_PARTITION, FETCH_PARTITION, FREEZE_PARTITION, + FREEZE_ALL, DELETE, UPDATE, @@ -66,6 +69,9 @@ public: /// A list of expressions of the form `column = expr` for the UPDATE command. ASTPtr update_assignments; + /// A column comment + ASTPtr comment; + bool detach = false; /// true for DETACH PARTITION bool part = false; /// true for ATTACH PART diff --git a/dbms/src/Parsers/ASTColumnDeclaration.h b/dbms/src/Parsers/ASTColumnDeclaration.h index 308e9b6652..870472fcb3 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.h +++ b/dbms/src/Parsers/ASTColumnDeclaration.h @@ -5,7 +5,7 @@ namespace DB { -/** Name, type, default-specifier, default-expression. +/** Name, type, default-specifier, default-expression, comment-expression. * The type is optional if default-expression is specified. */ class ASTColumnDeclaration : public IAST @@ -15,6 +15,7 @@ public: ASTPtr type; String default_specifier; ASTPtr default_expression; + ASTPtr comment; String getID() const override { return "ColumnDeclaration_" + name; } @@ -35,10 +36,15 @@ public: res->children.push_back(res->default_expression); } + if (comment) + { + res->comment = comment->clone(); + res->children.push_back(res->comment); + } + return res; } -protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { frame.need_parens = false; @@ -56,6 +62,12 @@ protected: settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "") << ' '; default_expression->formatImpl(settings, state, frame); } + + if (comment) + { + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COMMENT" << (settings.hilite ? hilite_none : "") << ' '; + comment->formatImpl(settings, state, frame); + } } }; diff --git a/dbms/src/Parsers/CMakeLists.txt b/dbms/src/Parsers/CMakeLists.txt index 90f314266f..790e151dab 100644 --- a/dbms/src/Parsers/CMakeLists.txt +++ b/dbms/src/Parsers/CMakeLists.txt @@ -1,9 +1,9 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) add_headers_and_sources(clickhouse_parsers .) add_library(clickhouse_parsers ${LINK_MODE} ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) -target_link_libraries (clickhouse_parsers clickhouse_common_io) -target_include_directories (clickhouse_parsers PUBLIC ${DBMS_INCLUDE_DIR}) +target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io) +target_include_directories(clickhouse_parsers PUBLIC ${DBMS_INCLUDE_DIR}) -if (ENABLE_TESTS) - add_subdirectory (tests) -endif () +if(ENABLE_TESTS) + add_subdirectory(tests) +endif() diff --git a/dbms/src/Parsers/IParser.h b/dbms/src/Parsers/IParser.h index 1bee3005fd..b300e99c4c 100644 --- a/dbms/src/Parsers/IParser.h +++ b/dbms/src/Parsers/IParser.h @@ -86,6 +86,14 @@ public: return true; } + /* The same, but never move the position and do not write the result to node. + */ + bool check_without_moving(Pos pos, Expected & expected) + { + ASTPtr node; + return parse(pos, node, expected); + } + virtual ~IParser() {} }; diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 56eaddb38e..9ddc295d18 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -24,13 +24,15 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_clear_column("CLEAR COLUMN"); ParserKeyword s_modify_column("MODIFY COLUMN"); ParserKeyword s_modify_primary_key("MODIFY PRIMARY KEY"); + ParserKeyword s_comment_column("COMMENT COLUMN"); ParserKeyword s_attach_partition("ATTACH PARTITION"); ParserKeyword s_detach_partition("DETACH PARTITION"); ParserKeyword s_drop_partition("DROP PARTITION"); ParserKeyword s_attach_part("ATTACH PART"); ParserKeyword s_fetch_partition("FETCH PARTITION"); - ParserKeyword s_freeze_partition("FREEZE PARTITION"); + ParserKeyword s_replace_partition("REPLACE PARTITION"); + ParserKeyword s_freeze("FREEZE"); ParserKeyword s_partition("PARTITION"); ParserKeyword s_after("AFTER"); @@ -46,6 +48,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserCompoundIdentifier parser_name; ParserStringLiteral parser_string_literal; ParserCompoundColumnDeclaration parser_col_decl; + ParserCompoundColumnDeclaration parser_modify_col_decl(false); ParserPartition parser_partition; ParserExpression parser_exp_elem; ParserList parser_assignment_list( @@ -121,7 +124,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::ATTACH_PARTITION; } } - else if (ParserKeyword{"REPLACE PARTITION"}.ignore(pos, expected)) + else if (s_replace_partition.ignore(pos, expected)) { if (!parser_partition.parse(pos, command->partition, expected)) return false; @@ -158,10 +161,19 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->from = typeid_cast(*ast_from).value.get(); command->type = ASTAlterCommand::FETCH_PARTITION; } - else if (s_freeze_partition.ignore(pos, expected)) + else if (s_freeze.ignore(pos, expected)) { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; + if (s_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::FREEZE_PARTITION; + } + else + { + command->type = ASTAlterCommand::FREEZE_ALL; + } /// WITH NAME 'name' - place local backup to directory with specified name if (s_with.ignore(pos, expected)) @@ -175,12 +187,10 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->with_name = typeid_cast(*ast_with_name).value.get(); } - - command->type = ASTAlterCommand::FREEZE_PARTITION; } else if (s_modify_column.ignore(pos, expected)) { - if (!parser_col_decl.parse(pos, command->col_decl, expected)) + if (!parser_modify_col_decl.parse(pos, command->col_decl, expected)) return false; command->type = ASTAlterCommand::MODIFY_COLUMN; @@ -220,6 +230,16 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::UPDATE; } + else if (s_comment_column.ignore(pos, expected)) + { + if (!parser_name.parse(pos, command->column, expected)) + return false; + + if (!parser_string_literal.parse(pos, command->comment, expected)) + return false; + + command->type = ASTAlterCommand::COMMENT_COLUMN; + } else return false; diff --git a/dbms/src/Parsers/ParserAlterQuery.h b/dbms/src/Parsers/ParserAlterQuery.h index 46908ae135..2eecfaf20d 100644 --- a/dbms/src/Parsers/ParserAlterQuery.h +++ b/dbms/src/Parsers/ParserAlterQuery.h @@ -13,9 +13,10 @@ namespace DB * [CLEAR COLUMN col_to_clear [IN PARTITION partition],] * [MODIFY COLUMN col_to_modify type, ...] * [MODIFY PRIMARY KEY (a, b, c...)] + * [COMMENT COLUMN col_name string] * [DROP|DETACH|ATTACH PARTITION|PART partition, ...] * [FETCH PARTITION partition FROM ...] - * [FREEZE PARTITION] + * [FREEZE [PARTITION] [WITH NAME name]] * [DELETE WHERE ...] * [UPDATE col_name = expr, ... WHERE ...] */ diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 75ce5b8053..59a8540b84 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -95,9 +96,16 @@ protected: template class IParserColumnDeclaration : public IParserBase { +public: + explicit IParserColumnDeclaration(bool require_type_ = true) : require_type(require_type_) + { + } + protected: const char * getName() const { return "column declaration"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected); + + bool require_type = true; }; using ParserColumnDeclaration = IParserColumnDeclaration; @@ -111,7 +119,9 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_default{"DEFAULT"}; ParserKeyword s_materialized{"MATERIALIZED"}; ParserKeyword s_alias{"ALIAS"}; + ParserKeyword s_comment{"COMMENT"}; ParserTernaryOperatorExpression expr_parser; + ParserStringLiteral string_literal_parser; /// mandatory column name ASTPtr name; @@ -119,26 +129,24 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E return false; /** column name should be followed by type name if it - * is not immediately followed by {DEFAULT, MATERIALIZED, ALIAS} + * is not immediately followed by {DEFAULT, MATERIALIZED, ALIAS, COMMENT} */ ASTPtr type; - const auto fallback_pos = pos; - if (!s_default.check(pos, expected) && - !s_materialized.check(pos, expected) && - !s_alias.check(pos, expected)) - { - type_parser.parse(pos, type, expected); - } - else - pos = fallback_pos; - - /// parse {DEFAULT, MATERIALIZED, ALIAS} String default_specifier; ASTPtr default_expression; + ASTPtr comment_expression; + + if (!s_default.check_without_moving(pos, expected) && + !s_materialized.check_without_moving(pos, expected) && + !s_alias.check_without_moving(pos, expected) && + !s_comment.check_without_moving(pos, expected)) + { + if (!type_parser.parse(pos, type, expected)) + return false; + } + Pos pos_before_specifier = pos; - if (s_default.ignore(pos, expected) || - s_materialized.ignore(pos, expected) || - s_alias.ignore(pos, expected)) + if (s_default.ignore(pos, expected) || s_materialized.ignore(pos, expected) || s_alias.ignore(pos, expected)) { default_specifier = Poco::toUpper(std::string{pos_before_specifier->begin, pos_before_specifier->end}); @@ -146,8 +154,17 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (!expr_parser.parse(pos, default_expression, expected)) return false; } - else if (!type) - return false; /// reject sole column name without type + + if (require_type && !type && !default_expression) + return false; /// reject column name without type + + + if (s_comment.ignore(pos, expected)) + { + /// should be followed by a string literal + if (!string_literal_parser.parse(pos, comment_expression, expected)) + return false; + } const auto column_declaration = std::make_shared(); node = column_declaration; @@ -165,6 +182,12 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->children.push_back(std::move(default_expression)); } + if (comment_expression) + { + column_declaration->comment = comment_expression; + column_declaration->children.push_back(std::move(comment_expression)); + } + return true; } diff --git a/dbms/src/Parsers/tests/CMakeLists.txt b/dbms/src/Parsers/tests/CMakeLists.txt index 4f87424bf8..3e1d6ae559 100644 --- a/dbms/src/Parsers/tests/CMakeLists.txt +++ b/dbms/src/Parsers/tests/CMakeLists.txt @@ -1,11 +1,10 @@ -set(SRCS ) +set(SRCS) -add_executable (lexer lexer.cpp ${SRCS}) -target_link_libraries (lexer clickhouse_parsers) +add_executable(lexer lexer.cpp ${SRCS}) +target_link_libraries(lexer PRIVATE clickhouse_parsers) -add_executable (select_parser select_parser.cpp ${SRCS}) -target_link_libraries (select_parser clickhouse_parsers) - -add_executable (create_parser create_parser.cpp ${SRCS}) -target_link_libraries (create_parser clickhouse_parsers) +add_executable(select_parser select_parser.cpp ${SRCS}) +target_link_libraries(select_parser PRIVATE clickhouse_parsers) +add_executable(create_parser create_parser.cpp ${SRCS}) +target_link_libraries(create_parser PRIVATE clickhouse_parsers) diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index fefd96262c..abbc277e71 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -82,6 +82,12 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.default_expression = ast_col_decl.default_expression; } + if (ast_col_decl.comment) + { + const auto & ast_comment = typeid_cast(*ast_col_decl.comment); + command.comment = ast_comment.value.get(); + } + return command; } else if (command_ast->type == ASTAlterCommand::MODIFY_PRIMARY_KEY) @@ -91,6 +97,16 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.primary_key = command_ast->primary_key; return command; } + else if (command_ast->type == ASTAlterCommand::COMMENT_COLUMN) + { + AlterCommand command; + command.type = COMMENT_COLUMN; + const auto & ast_identifier = typeid_cast(*command_ast->column); + command.column_name = ast_identifier.name; + const auto & ast_comment = typeid_cast(*command_ast->comment); + command.comment = ast_comment.value.get(); + return command; + } else return {}; } @@ -179,6 +195,20 @@ void AlterCommand::apply(ColumnsDescription & columns_description) const } else if (type == MODIFY_COLUMN) { + if (!is_mutable()) + { + auto & comments = columns_description.comments; + if (comment.empty()) + { + if (auto it = comments.find(column_name); it != comments.end()) + comments.erase(it); + } + else + columns_description.comments[column_name] = comment; + + return; + } + const auto default_it = columns_description.defaults.find(column_name); const auto had_default_expr = default_it != std::end(columns_description.defaults); const auto old_default_kind = had_default_expr ? default_it->second.kind : ColumnDefaultKind{}; @@ -238,10 +268,24 @@ void AlterCommand::apply(ColumnsDescription & columns_description) const /// This have no relation to changing the list of columns. /// TODO Check that all columns exist, that only columns with constant defaults are added. } + else if (type == COMMENT_COLUMN) + { + + columns_description.comments[column_name] = comment; + } else throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); } +bool AlterCommand::is_mutable() const +{ + if (type == COMMENT_COLUMN) + return false; + if (type == MODIFY_COLUMN) + return data_type.get() || default_expression; + // TODO: возможно, здесь нужно дополнить + return true; +} void AlterCommands::apply(ColumnsDescription & columns_description) const { @@ -356,6 +400,15 @@ void AlterCommands::validate(const IStorage & table, const Context & context) throw Exception("Wrong column name. Cannot find column " + command.column_name + " to drop", ErrorCodes::ILLEGAL_COLUMN); } + else if (command.type == AlterCommand::COMMENT_COLUMN) + { + const auto column_it = std::find_if(std::begin(all_columns), std::end(all_columns), + std::bind(namesEqual, std::cref(command.column_name), std::placeholders::_1)); + if (column_it == std::end(all_columns)) + { + throw Exception{"Wrong column name. Cannot find column " + command.column_name + " to comment", ErrorCodes::ILLEGAL_COLUMN}; + } + } } /** Existing defaulted columns may require default expression extensions with a type conversion, @@ -429,4 +482,15 @@ void AlterCommands::validate(const IStorage & table, const Context & context) } } +bool AlterCommands::is_mutable() const +{ + for (const auto & param : *this) + { + if (param.is_mutable()) + return true; + } + + return false; +} + } diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 888bd64f03..020fcb33fa 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -12,7 +12,8 @@ namespace DB class ASTAlterCommand; -/// Operation from the ALTER query (except for manipulation with PART/PARTITION). Adding Nested columns is not expanded to add individual columns. +/// Operation from the ALTER query (except for manipulation with PART/PARTITION). +/// Adding Nested columns is not expanded to add individual columns. struct AlterCommand { enum Type @@ -21,9 +22,11 @@ struct AlterCommand DROP_COLUMN, MODIFY_COLUMN, MODIFY_PRIMARY_KEY, + COMMENT_COLUMN, + UKNOWN_TYPE, }; - Type type; + Type type = UKNOWN_TYPE; String column_name; @@ -35,6 +38,7 @@ struct AlterCommand ColumnDefaultKind default_kind{}; ASTPtr default_expression{}; + String comment; /// For ADD - after which column to add a new one. If an empty string, add to the end. To add to the beginning now it is impossible. String after_column; @@ -45,15 +49,17 @@ struct AlterCommand AlterCommand() = default; AlterCommand(const Type type, const String & column_name, const DataTypePtr & data_type, const ColumnDefaultKind default_kind, const ASTPtr & default_expression, - const String & after_column = String{}) + const String & after_column = String{}, const String & comment = "") // TODO: разобраться здесь с параметром по умолчанию : type{type}, column_name{column_name}, data_type{data_type}, default_kind{default_kind}, - default_expression{default_expression}, after_column{after_column} + default_expression{default_expression}, comment(comment), after_column{after_column} {} static std::optional parse(const ASTAlterCommand * command); void apply(ColumnsDescription & columns_description) const; + /// Checks that not only metadata touched by that command + bool is_mutable() const; }; class IStorage; @@ -65,6 +71,7 @@ public: void apply(ColumnsDescription & columns_description) const; void validate(const IStorage & table, const Context & context); + bool is_mutable() const; }; } diff --git a/dbms/src/Storages/ColumnDefault.cpp b/dbms/src/Storages/ColumnDefault.cpp index 0ff885ab1e..fdec46958a 100644 --- a/dbms/src/Storages/ColumnDefault.cpp +++ b/dbms/src/Storages/ColumnDefault.cpp @@ -5,6 +5,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + ColumnDefaultKind columnDefaultKindFromString(const std::string & str) { @@ -15,7 +20,7 @@ ColumnDefaultKind columnDefaultKindFromString(const std::string & str) }; const auto it = map.find(str); - return it != std::end(map) ? it->second : throw Exception{"Unknown column default specifier: " + str}; + return it != std::end(map) ? it->second : throw Exception{"Unknown column default specifier: " + str, ErrorCodes::LOGICAL_ERROR}; } @@ -28,7 +33,7 @@ std::string toString(const ColumnDefaultKind kind) }; const auto it = map.find(kind); - return it != std::end(map) ? it->second : throw Exception{"Invalid ColumnDefaultKind"}; + return it != std::end(map) ? it->second : throw Exception{"Invalid ColumnDefaultKind", ErrorCodes::LOGICAL_ERROR}; } diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp index cb67d01a4e..2432f08444 100644 --- a/dbms/src/Storages/ColumnsDescription.cpp +++ b/dbms/src/Storages/ColumnsDescription.cpp @@ -1,4 +1,6 @@ #include +#include +#include #include #include #include @@ -10,12 +12,15 @@ #include #include #include +#include #include #include #include +#include + namespace DB { @@ -75,22 +80,32 @@ String ColumnsDescription::toString() const { for (const auto & column : columns) { - const auto it = defaults.find(column.name); + const auto defaults_it = defaults.find(column.name); + const auto comments_it = comments.find(column.name); writeBackQuotedString(column.name, buf); writeChar(' ', buf); writeText(column.type->getName(), buf); - if (it == std::end(defaults)) - { - writeChar('\n', buf); - continue; - } - else - writeChar('\t', buf); - writeText(DB::toString(it->second.kind), buf); - writeChar('\t', buf); - writeText(queryToString(it->second.expression), buf); + const bool exist_comment = comments_it != std::end(comments); + if (defaults_it != std::end(defaults)) + { + writeChar('\t', buf); + writeText(DB::toString(defaults_it->second.kind), buf); + writeChar('\t', buf); + writeText(queryToString(defaults_it->second.expression), buf); + } + else if (exist_comment) + { + writeChar('\t', buf); + } + + if (exist_comment) + { + writeChar('\t', buf); + writeText(queryToString(ASTLiteral(Field(comments_it->second))), buf); + } + writeChar('\n', buf); } }; @@ -98,10 +113,85 @@ String ColumnsDescription::toString() const write_columns(ordinary); write_columns(materialized); write_columns(aliases); - return buf.str(); } +std::optional parseDefaultInfo(ReadBufferFromString & buf) +{ + if (*buf.position() == '\n') + return {}; + + assertChar('\t', buf); + if (*buf.position() == '\t') + return {}; + + String default_kind_str; + readText(default_kind_str, buf); + const auto default_kind = columnDefaultKindFromString(default_kind_str); + assertChar('\t', buf); + + ParserExpression expr_parser; + String default_expr_str; + readText(default_expr_str, buf); + ASTPtr default_expr = parseQuery(expr_parser, default_expr_str, "default_expression", 0); + return ColumnDefault{default_kind, std::move(default_expr)}; +} + +String parseComment(ReadBufferFromString& buf) +{ + if (*buf.position() == '\n') + return {}; + + assertChar('\t', buf); + ParserStringLiteral string_literal_parser; + String comment_expr_str; + readText(comment_expr_str, buf); + ASTPtr comment_expr = parseQuery(string_literal_parser, comment_expr_str, "comment expression", 0); + return typeid_cast(*comment_expr).value.get(); +} + +void parseColumn(ReadBufferFromString & buf, ColumnsDescription & result, const DataTypeFactory & data_type_factory) +{ + String column_name; + readBackQuotedStringWithSQLStyle(column_name, buf); + assertChar(' ', buf); + + String type_name; + readText(type_name, buf); + auto type = data_type_factory.get(type_name); + if (*buf.position() == '\n') + { + assertChar('\n', buf); + result.ordinary.emplace_back(column_name, std::move(type)); + return; + } + + auto column_default = parseDefaultInfo(buf); + if (column_default) + { + switch (column_default->kind) + { + case ColumnDefaultKind::Default: + result.ordinary.emplace_back(column_name, std::move(type)); + break; + case ColumnDefaultKind::Materialized: + result.materialized.emplace_back(column_name, std::move(type)); + break; + case ColumnDefaultKind::Alias: + result.aliases.emplace_back(column_name, std::move(type)); + } + + result.defaults.emplace(column_name, std::move(*column_default)); + } + + auto comment = parseComment(buf); + if (!comment.empty()) + { + result.comments.emplace(column_name, std::move(comment)); + } + + assertChar('\n', buf); +} ColumnsDescription ColumnsDescription::parse(const String & str) { @@ -118,47 +208,10 @@ ColumnsDescription ColumnsDescription::parse(const String & str) ColumnsDescription result; for (size_t i = 0; i < count; ++i) { - String column_name; - readBackQuotedStringWithSQLStyle(column_name, buf); - assertChar(' ', buf); - - String type_name; - readText(type_name, buf); - auto type = data_type_factory.get(type_name); - if (*buf.position() == '\n') - { - assertChar('\n', buf); - - result.ordinary.emplace_back(column_name, std::move(type)); - continue; - } - assertChar('\t', buf); - - String default_kind_str; - readText(default_kind_str, buf); - const auto default_kind = columnDefaultKindFromString(default_kind_str); - assertChar('\t', buf); - - String default_expr_str; - readText(default_expr_str, buf); - assertChar('\n', buf); - - const char * begin = default_expr_str.data(); - const auto end = begin + default_expr_str.size(); - ASTPtr default_expr = parseQuery(expr_parser, begin, end, "default expression", 0); - - if (ColumnDefaultKind::Default == default_kind) - result.ordinary.emplace_back(column_name, std::move(type)); - else if (ColumnDefaultKind::Materialized == default_kind) - result.materialized.emplace_back(column_name, std::move(type)); - else if (ColumnDefaultKind::Alias == default_kind) - result.aliases.emplace_back(column_name, std::move(type)); - - result.defaults.emplace(column_name, ColumnDefault{default_kind, default_expr}); + parseColumn(buf, result, data_type_factory); } assertEOF(buf); - return result; } diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index 288d2712b3..2d35ac403c 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -9,12 +9,16 @@ namespace DB { +/// key-values column_name, column_comment. column_comment should be non empty. +using ColumnComments = std::unordered_map; + struct ColumnsDescription { NamesAndTypesList ordinary; NamesAndTypesList materialized; NamesAndTypesList aliases; ColumnDefaults defaults; + ColumnComments comments; ColumnsDescription() = default; @@ -22,11 +26,13 @@ struct ColumnsDescription NamesAndTypesList ordinary_, NamesAndTypesList materialized_, NamesAndTypesList aliases_, - ColumnDefaults defaults_) + ColumnDefaults defaults_, + ColumnComments comments_) : ordinary(std::move(ordinary_)) , materialized(std::move(materialized_)) , aliases(std::move(aliases_)) , defaults(std::move(defaults_)) + , comments(std::move(comments_)) {} explicit ColumnsDescription(NamesAndTypesList ordinary_) : ordinary(std::move(ordinary_)) {} @@ -36,7 +42,8 @@ struct ColumnsDescription return ordinary == other.ordinary && materialized == other.materialized && aliases == other.aliases - && defaults == other.defaults; + && defaults == other.defaults + && comments == other.comments; } bool operator!=(const ColumnsDescription & other) const { return !(*this == other); } diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp index 39e78ffd36..319b4c8958 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -54,6 +54,7 @@ namespace ErrorCodes { extern const int TIMEOUT_EXCEEDED; extern const int TYPE_MISMATCH; + extern const int CANNOT_LINK; } @@ -196,7 +197,8 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp auto thread_group = CurrentThread::getGroup(); return [this, thread_group, &job, ¤t_block]() { - CurrentThread::attachToIfDetached(thread_group); + if (thread_group) + CurrentThread::attachToIfDetached(thread_group); setThreadName("DistrOutStrProc"); ++job.blocks_started; @@ -556,7 +558,7 @@ void DistributedBlockOutputStream::writeToShard(const Block & block, const std:: } if (link(first_file_tmp_path.data(), block_file_path.data())) - throwFromErrno("Could not link " + block_file_path + " to " + first_file_tmp_path); + throwFromErrno("Could not link " + block_file_path + " to " + first_file_tmp_path, ErrorCodes::CANNOT_LINK); } /** remove the temporary file, enabling the OS to reclaim inode after all threads diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index a0a6103558..65d40d82c9 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -3,7 +3,10 @@ #include #include #include +#include #include +#include +#include #include #include #include @@ -18,6 +21,7 @@ namespace DB namespace ErrorCodes { extern const int TABLE_IS_DROPPED; + extern const int NOT_IMPLEMENTED; } class Context; @@ -44,6 +48,7 @@ struct Settings; class AlterCommands; class MutationCommands; +class PartitionCommands; /** Does not allow changing the table description (including rename and delete the table). @@ -233,49 +238,27 @@ public: * This method must fully execute the ALTER query, taking care of the locks itself. * To update the table metadata on disk, this method should call InterpreterAlterQuery::updateMetadata. */ - virtual void alter(const AlterCommands & /*params*/, const String & /*database_name*/, const String & /*table_name*/, const Context & /*context*/) + virtual void alter(const AlterCommands & params, const String & database_name, const String & table_name, const Context & context) { - throw Exception("Method alter is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + for (const auto & param : params) + { + if (param.is_mutable()) + throw Exception("Method alter supports only change comment of column for storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + + auto lock = lockStructureForAlter(__PRETTY_FUNCTION__); + auto new_columns = getColumns(); + params.apply(new_columns); + context.getDatabase(database_name)->alterTable(context, table_name, new_columns, {}); + setColumns(std::move(new_columns)); } - /** Execute CLEAR COLUMN ... IN PARTITION query which removes column from given partition. */ - virtual void clearColumnInPartition(const ASTPtr & /*partition*/, const Field & /*column_name*/, const Context & /*context*/) - { - throw Exception("Method dropColumnFromPartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - /** Execute ALTER TABLE dst.table REPLACE(ATTACH) PARTITION partition FROM src.table */ - virtual void replacePartitionFrom(const StoragePtr & /*source_table*/, const ASTPtr & /*partition*/, bool /*replace*/, const Context &) - { - throw Exception("Method replacePartitionFrom is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - /** Run the query (DROP|DETACH) PARTITION. + /** ALTER tables with regard to its partitions. + * Should handle locks for each command on its own. */ - virtual void dropPartition(const ASTPtr & /*query*/, const ASTPtr & /*partition*/, bool /*detach*/, const Context & /*context*/) + virtual void partition(const ASTPtr & /* query */, const PartitionCommands & /* commands */, const Context & /* context */) { - throw Exception("Method dropPartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - /** Run the ATTACH request (PART|PARTITION). - */ - virtual void attachPartition(const ASTPtr & /*partition*/, bool /*part*/, const Context & /*context*/) - { - throw Exception("Method attachPartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - /** Run the FETCH PARTITION query. - */ - virtual void fetchPartition(const ASTPtr & /*partition*/, const String & /*from*/, const Context & /*context*/) - { - throw Exception("Method fetchPartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - - /** Run the FREEZE PARTITION request. That is, create a local backup (snapshot) of data using the `localBackup` function (see localBackup.h) - */ - virtual void freezePartition(const ASTPtr & /*partition*/, const String & /*with_name*/, const Context & /*context*/) - { - throw Exception("Method freezePartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Partition operations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } /** Perform any background work. For example, combining parts in a MergeTree type table. @@ -325,7 +308,7 @@ public: virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */) const { return false; } /// Checks validity of the data - virtual bool checkData() const { throw DB::Exception("Check query is not supported for " + getName() + " storage"); } + virtual bool checkData() const { throw Exception("Check query is not supported for " + getName() + " storage", ErrorCodes::NOT_IMPLEMENTED); } /// Checks that table could be dropped right now /// Otherwise - throws an exception with detailed information. @@ -349,6 +332,24 @@ public: /// Returns primary expression for storage or nullptr if there is no. virtual ASTPtr getPrimaryExpression() const { return nullptr; } + /// Returns partition expression for storage or nullptr if there is no. + virtual ASTPtr getPartitionExpression() const { return nullptr; } + + /// Returns secondary expression for storage or nullptr if there is no. + virtual ASTPtr getOrderExpression() const { return nullptr; } + + /// Returns sampling key names for storage or empty vector if there is no. + virtual Names getSamplingExpressionNames() const { return {}; } + + /// Returns primary key names for storage or empty vector if there is no. + virtual Names getPrimaryExpressionNames() const { return {}; } + + /// Returns partition key names for storage or empty vector if there is no. + virtual Names getPartitionExpressionNames() const { return {}; } + + /// Returns order key names for storage or empty vector if there is no. + virtual Names getOrderExpressionNames() const { return {}; } + using ITableDeclaration::ITableDeclaration; using std::enable_shared_from_this::shared_from_this; diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index f2022b3377..03f45382da 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -79,7 +79,7 @@ class ReadBufferFromKafkaConsumer : public ReadBuffer } // Process next buffered message - rd_kafka_message_t * msg = rd_kafka_consumer_poll(consumer, READ_POLL_MS); + rd_kafka_message_t * msg = rd_kafka_consumer_poll(consumer, READ_POLL_MS); // XXX: use RAII. if (msg == nullptr) return false; diff --git a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp index f54e74fa5f..ab1d28ee65 100644 --- a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp @@ -25,6 +25,7 @@ namespace ErrorCodes extern const int ABORTED; extern const int BAD_SIZE_OF_FILE_IN_DATA_PART; extern const int CANNOT_WRITE_TO_OSTREAM; + extern const int CHECKSUM_DOESNT_MATCH; extern const int UNKNOWN_TABLE; } @@ -185,7 +186,15 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( creds.setPassword(password); } - ReadWriteBufferFromHTTP in{uri, Poco::Net::HTTPRequest::HTTP_POST, {}, timeouts, creds}; + PooledReadWriteBufferFromHTTP in{ + uri, + Poco::Net::HTTPRequest::HTTP_POST, + {}, + timeouts, + creds, + DBMS_DEFAULT_BUFFER_SIZE, + data.settings.replicated_max_parallel_fetches_for_host + }; static const String TMP_PREFIX = "tmp_fetch_"; String tmp_prefix = tmp_prefix_.empty() ? TMP_PREFIX : tmp_prefix_; @@ -232,7 +241,8 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( readPODBinary(expected_hash, in); if (expected_hash != hashing_out.getHash()) - throw Exception("Checksum mismatch for file " + absolute_part_path + file_name + " transferred from " + replica_path); + throw Exception("Checksum mismatch for file " + absolute_part_path + file_name + " transferred from " + replica_path, + ErrorCodes::CHECKSUM_DOESNT_MATCH); if (file_name != "checksums.txt" && file_name != "columns.txt") diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 524b8bfe8b..4905c135f4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -79,6 +79,9 @@ namespace ErrorCodes extern const int TOO_MANY_PARTS; extern const int INCOMPATIBLE_COLUMNS; extern const int CANNOT_UPDATE_COLUMN; + extern const int CANNOT_ALLOCATE_MEMORY; + extern const int CANNOT_MUNMAP; + extern const int CANNOT_MREMAP; } @@ -190,7 +193,7 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam for (const ExpressionAction & action : expr.getActions()) { if (action.type == ExpressionAction::ARRAY_JOIN) - throw Exception(key_name + " key cannot contain array joins"); + throw Exception(key_name + " key cannot contain array joins", ErrorCodes::ILLEGAL_COLUMN); if (action.type == ExpressionAction::APPLY_FUNCTION) { @@ -343,7 +346,7 @@ void MergeTreeData::MergingParams::check(const NamesAndTypesList & columns) cons } } if (miss_column) - throw Exception("Sign column " + sign_column + " does not exist in table declaration."); + throw Exception("Sign column " + sign_column + " does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); }; /// that if the version_column column is needed, it exists and is of unsigned integer type. @@ -372,7 +375,7 @@ void MergeTreeData::MergingParams::check(const NamesAndTypesList & columns) cons } } if (miss_column) - throw Exception("Version column " + version_column + " does not exist in table declaration."); + throw Exception("Version column " + version_column + " does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); }; if (mode == MergingParams::Collapsing) @@ -389,7 +392,7 @@ void MergeTreeData::MergingParams::check(const NamesAndTypesList & columns) cons }; if (columns.end() == std::find_if(columns.begin(), columns.end(), check_column_to_sum_exists)) throw Exception( - "Column " + column_to_sum + " listed in columns to sum does not exist in table declaration."); + "Column " + column_to_sum + " listed in columns to sum does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); } } @@ -477,7 +480,10 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) /// Don't count the part as broken if there is not enough memory to load it. /// In fact, there can be many similar situations. /// But it is OK, because there is a safety guard against deleting too many parts. - if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED) + if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED + || e.code() == ErrorCodes::CANNOT_ALLOCATE_MEMORY + || e.code() == ErrorCodes::CANNOT_MUNMAP + || e.code() == ErrorCodes::CANNOT_MREMAP) throw; broken = true; @@ -913,6 +919,11 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) for (const AlterCommand & command : commands) { + if (!command.is_mutable()) + { + continue; + } + if (columns_alter_forbidden.count(command.column_name)) throw Exception("trying to ALTER key column " + command.column_name, ErrorCodes::ILLEGAL_COLUMN); @@ -1279,6 +1290,11 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart( return transaction; } +void MergeTreeData::freezeAll(const String & with_name, const Context & context) +{ + freezePartitionsByMatcher([] (const DataPartPtr &){ return true; }, with_name, context); +} + void MergeTreeData::AlterDataPartTransaction::commit() { if (!data_part) @@ -2064,44 +2080,17 @@ void MergeTreeData::freezePartition(const ASTPtr & partition_ast, const String & else LOG_DEBUG(log, "Freezing parts with partition ID " + partition_id); - String clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString(); - String shadow_path = clickhouse_path + "shadow/"; - Poco::File(shadow_path).createDirectories(); - String backup_path = shadow_path - + (!with_name.empty() - ? escapeForFileName(with_name) - : toString(Increment(shadow_path + "increment.txt").get(true))) - + "/"; - LOG_DEBUG(log, "Snapshot will be placed at " + backup_path); - - /// Acquire a snapshot of active data parts to prevent removing while doing backup. - const auto data_parts = getDataParts(); - - size_t parts_processed = 0; - for (const auto & part : data_parts) - { - if (prefix) + freezePartitionsByMatcher( + [&prefix, &partition_id](const DataPartPtr & part) { - if (!startsWith(part->info.partition_id, *prefix)) - continue; - } - else if (part->info.partition_id != partition_id) - continue; - - LOG_DEBUG(log, "Freezing part " << part->name); - - String part_absolute_path = Poco::Path(part->getFullPath()).absolute().toString(); - if (!startsWith(part_absolute_path, clickhouse_path)) - throw Exception("Part path " + part_absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); - - String backup_part_absolute_path = part_absolute_path; - backup_part_absolute_path.replace(0, clickhouse_path.size(), backup_path); - localBackup(part_absolute_path, backup_part_absolute_path); - ++parts_processed; - } - - LOG_DEBUG(log, "Freezed " << parts_processed << " parts"); + if (prefix) + return startsWith(part->info.partition_id, *prefix); + else + return part->info.partition_id == partition_id; + }, + with_name, + context); } size_t MergeTreeData::getPartitionSize(const std::string & partition_id) const @@ -2442,4 +2431,41 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPart(const Merg return dst_data_part; } +void MergeTreeData::freezePartitionsByMatcher(MatcherFn matcher, const String & with_name, const Context & context) +{ + String clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString(); + String shadow_path = clickhouse_path + "shadow/"; + Poco::File(shadow_path).createDirectories(); + String backup_path = shadow_path + + (!with_name.empty() + ? escapeForFileName(with_name) + : toString(Increment(shadow_path + "increment.txt").get(true))) + + "/"; + + LOG_DEBUG(log, "Snapshot will be placed at " + backup_path); + + /// Acquire a snapshot of active data parts to prevent removing while doing backup. + const auto data_parts = getDataParts(); + + size_t parts_processed = 0; + for (const auto & part : data_parts) + { + if (!matcher(part)) + continue; + + LOG_DEBUG(log, "Freezing part " << part->name); + + String part_absolute_path = Poco::Path(part->getFullPath()).absolute().toString(); + if (!startsWith(part_absolute_path, clickhouse_path)) + throw Exception("Part path " + part_absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); + + String backup_part_absolute_path = part_absolute_path; + backup_part_absolute_path.replace(0, clickhouse_path.size(), backup_path); + localBackup(part_absolute_path, backup_part_absolute_path); + ++parts_processed; + } + + LOG_DEBUG(log, "Freezed " << parts_processed << " parts"); +} + } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 5ad413f21f..034719828b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -192,7 +192,7 @@ public: { rollback(); } - catch(...) + catch (...) { tryLogCurrentException("~MergeTreeData::Transaction"); } @@ -480,6 +480,9 @@ public: const ASTPtr & new_primary_key, bool skip_sanity_checks); + /// Freezes all parts. + void freezeAll(const String & with_name, const Context & context); + /// Should be called if part data is suspected to be corrupted. void reportBrokenPart(const String & name) const { @@ -720,6 +723,10 @@ private: /// Checks whether the column is in the primary key, possibly wrapped in a chain of functions with single argument. bool isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const; + + /// Common part for |freezePartition()| and |freezeAll()|. + using MatcherFn = std::function; + void freezePartitionsByMatcher(MatcherFn matcher, const String & with_name, const Context & context); }; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 0446506e9d..af1a668565 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -19,6 +19,11 @@ namespace ProfileEvents namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace { @@ -149,7 +154,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa DayNum max_month = date_lut.toFirstDayNumOfMonth(DayNum(max_date)); if (min_month != max_month) - throw Exception("Logical error: part spans more than one month."); + throw Exception("Logical error: part spans more than one month.", ErrorCodes::LOGICAL_ERROR); part_name = new_part_info.getPartNameV0(min_date, max_date); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h index f7fa9bf670..e135cf24a1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h @@ -91,6 +91,10 @@ struct MergeTreeSettings /** Limit parallel fetches */ \ M(SettingUInt64, replicated_max_parallel_fetches, 0) \ M(SettingUInt64, replicated_max_parallel_fetches_for_table, 0) \ + \ + /** Limit parallel fetches from endpoint (actually pool size) */ \ + M(SettingUInt64, replicated_max_parallel_fetches_for_host, DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT)\ + \ /** Limit parallel sends */ \ M(SettingUInt64, replicated_max_parallel_sends, 0) \ M(SettingUInt64, replicated_max_parallel_sends_for_table, 0) \ diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index 3f242eb975..cdfb71b572 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -27,6 +27,8 @@ namespace ErrorCodes extern const int UNKNOWN_STATUS_OF_INSERT; extern const int INSERT_WAS_DEDUPLICATED; extern const int KEEPER_EXCEPTION; + extern const int TIMEOUT_EXCEEDED; + extern const int NO_ACTIVE_REPLICAS; } @@ -389,14 +391,14 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo break; if (!event->tryWait(quorum_timeout_ms)) - throw Exception("Timeout while waiting for quorum"); + throw Exception("Timeout while waiting for quorum", ErrorCodes::TIMEOUT_EXCEEDED); } /// And what if it is possible that the current replica at this time has ceased to be active and the quorum is marked as failed and deleted? String value; if (!zookeeper->tryGet(storage.replica_path + "/is_active", value, nullptr) || value != quorum_info.is_active_node_value) - throw Exception("Replica become inactive while waiting for quorum"); + throw Exception("Replica become inactive while waiting for quorum", ErrorCodes::NO_ACTIVE_REPLICAS); } catch (...) { diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index b65ba8868e..fb7a2ad073 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -14,6 +14,11 @@ namespace ProfileEvents namespace DB { +namespace ErrorCodes +{ + extern const int TABLE_DIFFERS_TOO_MUCH; +} + static const auto PART_CHECK_ERROR_SLEEP_MS = 5 * 1000; @@ -213,7 +218,7 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) auto zk_columns = NamesAndTypesList::parse( zookeeper->get(storage.replica_path + "/parts/" + part_name + "/columns")); if (part->columns != zk_columns) - throw Exception("Columns of local part " + part_name + " are different from ZooKeeper"); + throw Exception("Columns of local part " + part_name + " are different from ZooKeeper", ErrorCodes::TABLE_DIFFERS_TOO_MUCH); checkDataPart( storage.data.getFullPath() + part_name, diff --git a/dbms/src/Storages/MergeTree/checkDataPart.cpp b/dbms/src/Storages/MergeTree/checkDataPart.cpp index a978bdf34f..eac9145692 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.cpp +++ b/dbms/src/Storages/MergeTree/checkDataPart.cpp @@ -21,6 +21,7 @@ namespace DB namespace ErrorCodes { extern const int CORRUPTED_DATA; + extern const int LOGICAL_ERROR; extern const int INCORRECT_MARK; extern const int EMPTY_LIST_OF_COLUMNS_PASSED; } @@ -285,7 +286,7 @@ MergeTreeData::DataPart::Checksums checkDataPart( String file_name = IDataType::getFileNameForStream(name_type.name, substream_path); auto stream_it = streams.find(file_name); if (stream_it == streams.end()) - throw Exception("Logical error: cannot find stream " + file_name); + throw Exception("Logical error: cannot find stream " + file_name, ErrorCodes::LOGICAL_ERROR); return &stream_it->second.uncompressed_hashing_buf; }; @@ -319,7 +320,7 @@ MergeTreeData::DataPart::Checksums checkDataPart( String file_name = IDataType::getFileNameForStream(name_type.name, substream_path); auto stream_it = streams.find(file_name); if (stream_it == streams.end()) - throw Exception("Logical error: cannot find stream " + file_name); + throw Exception("Logical error: cannot find stream " + file_name, ErrorCodes::LOGICAL_ERROR); stream_it->second.assertEnd(); stream_it->second.saveChecksums(checksums_data); diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index a875b74376..ebce8badca 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -205,6 +205,8 @@ static void setGraphitePatternsFromConfig(const Context & context, static String getMergeTreeVerboseHelp(bool is_extended_syntax) { + using namespace std::string_literals; + String help = R"( MergeTree is a family of storage engines. @@ -248,14 +250,14 @@ Next parameter is index (primary key) granularity. Good value is 8192. You have )"; help += R"( -For the Collapsing mode, the last parameter is the name of a sign column - a special column that is used to 'collapse' rows with the same primary key while merging. +For the Collapsing mode, the )" + (is_extended_syntax ? "only"s : "last"s) + R"( parameter is the name of a sign column - a special column that is used to 'collapse' rows with the same primary key while merging. -For the Summing mode, the optional last parameter is a list of columns to sum while merging. This list is passed in round brackets, like (PageViews, Cost). +For the Summing mode, the optional )" + (is_extended_syntax ? ""s : "last "s) + R"(parameter is a list of columns to sum while merging. This list is passed in round brackets, like (PageViews, Cost). If this parameter is omitted, the storage will sum all numeric columns except columns participating in the primary key. -For the Replacing mode, the optional last parameter is the name of a 'version' column. While merging, for all rows with the same primary key, only one row is selected: the last row, if the version column was not specified, or the last row with the maximum version value, if specified. +For the Replacing mode, the optional )" + (is_extended_syntax ? ""s : "last "s) + R"(parameter is the name of a 'version' column. While merging, for all rows with the same primary key, only one row is selected: the last row, if the version column was not specified, or the last row with the maximum version value, if specified. -For VersionedCollapsing mode, the last 2 parameters are the name of a sign column and the name of a 'version' column. Version column must be in primary key. While merging, a pair of rows with the same primary key and different sign may collapse. +For VersionedCollapsing mode, the )" + (is_extended_syntax ? ""s : "last "s) + R"(2 parameters are the name of a sign column and the name of a 'version' column. Version column must be in primary key. While merging, a pair of rows with the same primary key and different sign may collapse. )"; if (is_extended_syntax) diff --git a/dbms/src/Storages/PartitionCommands.cpp b/dbms/src/Storages/PartitionCommands.cpp index e7daabb246..b6ea7794cb 100644 --- a/dbms/src/Storages/PartitionCommands.cpp +++ b/dbms/src/Storages/PartitionCommands.cpp @@ -61,7 +61,7 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * else if (command_ast->type == ASTAlterCommand::DROP_COLUMN && command_ast->partition) { if (!command_ast->clear_column) - throw Exception("Can't DROP COLUMN from partition. It is possible only CLEAR COLUMN in partition", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Can't DROP COLUMN from partition. It is possible only to CLEAR COLUMN in partition", ErrorCodes::BAD_ARGUMENTS); PartitionCommand res; res.type = CLEAR_COLUMN; @@ -70,6 +70,13 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * res.column_name = column_name; return res; } + else if (command_ast->type == ASTAlterCommand::FREEZE_ALL) + { + PartitionCommand command; + command.type = PartitionCommand::FREEZE_ALL_PARTITIONS; + command.with_name = command_ast->with_name; + return command; + } else return {}; } diff --git a/dbms/src/Storages/PartitionCommands.h b/dbms/src/Storages/PartitionCommands.h index 2f2479eccc..987af8baef 100644 --- a/dbms/src/Storages/PartitionCommands.h +++ b/dbms/src/Storages/PartitionCommands.h @@ -17,12 +17,13 @@ struct PartitionCommand { enum Type { - DROP_PARTITION, ATTACH_PARTITION, - REPLACE_PARTITION, - FETCH_PARTITION, - FREEZE_PARTITION, CLEAR_COLUMN, + DROP_PARTITION, + FETCH_PARTITION, + FREEZE_ALL_PARTITIONS, + FREEZE_PARTITION, + REPLACE_PARTITION, }; Type type; diff --git a/dbms/src/Storages/StorageDictionary.cpp b/dbms/src/Storages/StorageDictionary.cpp index 78fac95a90..450a0307e1 100644 --- a/dbms/src/Storages/StorageDictionary.cpp +++ b/dbms/src/Storages/StorageDictionary.cpp @@ -19,6 +19,7 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int THERE_IS_NO_COLUMN; } @@ -80,7 +81,7 @@ void StorageDictionary::checkNamesAndTypesCompatibleWithDictionary(const Diction message += " in dictionary " + dictionary_name + ". "; message += "There are only columns "; message += generateNamesAndTypesDescription(dictionary_names_and_types.begin(), dictionary_names_and_types.end()); - throw Exception(message); + throw Exception(message, ErrorCodes::THERE_IS_NO_COLUMN); } } } diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index b83c49c3a2..e1af257687 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -1,3 +1,5 @@ +#include + #include #include #include @@ -9,7 +11,6 @@ #include #include -#include #include #include @@ -239,28 +240,10 @@ bool StorageMaterializedView::optimize(const ASTPtr & query, const ASTPtr & part return getTargetTable()->optimize(query, partition, final, deduplicate, context); } -void StorageMaterializedView::dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & context) +void StorageMaterializedView::partition(const ASTPtr & query, const PartitionCommands &commands, const Context &context) { checkStatementCanBeForwarded(); - getTargetTable()->dropPartition(query, partition, detach, context); -} - -void StorageMaterializedView::clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context) -{ - checkStatementCanBeForwarded(); - getTargetTable()->clearColumnInPartition(partition, column_name, context); -} - -void StorageMaterializedView::attachPartition(const ASTPtr & partition, bool part, const Context & context) -{ - checkStatementCanBeForwarded(); - getTargetTable()->attachPartition(partition, part, context); -} - -void StorageMaterializedView::freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) -{ - checkStatementCanBeForwarded(); - getTargetTable()->freezePartition(partition, with_name, context); + getTargetTable()->partition(query, commands, context); } void StorageMaterializedView::mutate(const MutationCommands & commands, const Context & context) diff --git a/dbms/src/Storages/StorageMaterializedView.h b/dbms/src/Storages/StorageMaterializedView.h index c20757b802..d1f463151e 100644 --- a/dbms/src/Storages/StorageMaterializedView.h +++ b/dbms/src/Storages/StorageMaterializedView.h @@ -8,7 +8,7 @@ namespace DB { -class IAST; +class IAST; // XXX: should include full class - for proper use inside inline methods using ASTPtr = std::shared_ptr; @@ -35,10 +35,8 @@ public: bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) override; - void dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & context) override; - void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context) override; - void attachPartition(const ASTPtr & partition, bool part, const Context & context) override; - void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) override; + void partition(const ASTPtr & query, const PartitionCommands & commands, const Context & context) override; + void mutate(const MutationCommands & commands, const Context & context) override; void shutdown() override; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 0e926218a0..7ed871e23c 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -188,6 +188,16 @@ void StorageMergeTree::alter( const String & table_name, const Context & context) { + if (!params.is_mutable()) + { + auto table_soft_lock = lockStructureForAlter(__PRETTY_FUNCTION__); + auto new_columns = getColumns(); + params.apply(new_columns); + context.getDatabase(database_name)->alterTable(context, table_name, new_columns, {}); + setColumns(std::move(new_columns)); + return; + } + /// NOTE: Here, as in ReplicatedMergeTree, you can do ALTER which does not block the writing of data for a long time. auto merge_blocker = merger_mutator.actions_blocker.cancel(); @@ -771,8 +781,55 @@ bool StorageMergeTree::optimize( return true; } +void StorageMergeTree::partition(const ASTPtr & query, const PartitionCommands & commands, const Context & context) +{ + for (const PartitionCommand & command : commands) + { + switch (command.type) + { + case PartitionCommand::DROP_PARTITION: + checkPartitionCanBeDropped(command.partition); + dropPartition(command.partition, command.detach, context); + break; -void StorageMergeTree::dropPartition(const ASTPtr & /*query*/, const ASTPtr & partition, bool detach, const Context & context) + case PartitionCommand::ATTACH_PARTITION: + attachPartition(command.partition, command.part, context); + break; + + case PartitionCommand::REPLACE_PARTITION: + { + checkPartitionCanBeDropped(command.partition); + String from_database = command.from_database.empty() ? context.getCurrentDatabase() : command.from_database; + auto from_storage = context.getTable(from_database, command.from_table); + replacePartitionFrom(from_storage, command.partition, command.replace, context); + } + break; + + case PartitionCommand::FREEZE_PARTITION: + { + auto lock = lockStructure(false, __PRETTY_FUNCTION__); + data.freezePartition(command.partition, command.with_name, context); + } + break; + + case PartitionCommand::CLEAR_COLUMN: + clearColumnInPartition(command.partition, command.column_name, context); + break; + + case PartitionCommand::FREEZE_ALL_PARTITIONS: + { + auto lock = lockStructure(false, __PRETTY_FUNCTION__); + data.freezeAll(command.with_name, context); + } + break; + + default: + IStorage::partition(query, commands, context); // should throw an exception. + } + } +} + +void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, const Context & context) { { /// Asks to complete merges and does not allow them to start. @@ -806,6 +863,8 @@ void StorageMergeTree::dropPartition(const ASTPtr & /*query*/, const ASTPtr & pa void StorageMergeTree::attachPartition(const ASTPtr & partition, bool part, const Context & context) { + // TODO: should get some locks to prevent race with 'alter … modify column' + String partition_id; if (part) @@ -858,11 +917,6 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool part, cons context.dropCaches(); } -void StorageMergeTree::freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) -{ - data.freezePartition(partition, with_name, context); -} - void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) { auto lock1 = lockStructure(false, __PRETTY_FUNCTION__); @@ -938,4 +992,34 @@ ActionLock StorageMergeTree::getActionLock(StorageActionBlockType action_type) return {}; } +Names StorageMergeTree::getSamplingExpressionNames() const +{ + NameOrderedSet names; + const auto & expr = data.sampling_expression; + if (expr) + expr->collectIdentifierNames(names); + + return Names(names.begin(), names.end()); +} + +Names StorageMergeTree::getPrimaryExpressionNames() const +{ + return data.getPrimarySortColumns(); +} + +Names StorageMergeTree::getPartitionExpressionNames() const +{ + NameOrderedSet names; + const auto & expr = data.partition_expr_ast; + if (expr) + expr->collectIdentifierNames(names); + + return Names(names.cbegin(), names.cend()); +} + +Names StorageMergeTree::getOrderExpressionNames() const +{ + return data.getSortColumns(); +} + } diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 0182f31dc8..1dfbb617ec 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -2,6 +2,7 @@ #include +#include #include #include #include @@ -42,15 +43,8 @@ public: const ColumnsDescription & getColumns() const override { return data.getColumns(); } void setColumns(ColumnsDescription columns_) override { return data.setColumns(std::move(columns_)); } - NameAndTypePair getColumn(const String & column_name) const override - { - return data.getColumn(column_name); - } - - bool hasColumn(const String & column_name) const override - { - return data.hasColumn(column_name); - } + NameAndTypePair getColumn(const String & column_name) const override { return data.getColumn(column_name); } + bool hasColumn(const String & column_name) const override { return data.hasColumn(column_name); } BlockInputStreams read( const Names & column_names, @@ -66,11 +60,7 @@ public: */ bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) override; - void dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & context) override; - void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context) override; - void attachPartition(const ASTPtr & partition, bool part, const Context & context) override; - void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) override; - void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) override; + void partition(const ASTPtr & query, const PartitionCommands & commands, const Context & context) override; void mutate(const MutationCommands & commands, const Context & context) override; @@ -98,6 +88,17 @@ public: ASTPtr getPrimaryExpression() const override { return data.primary_expr_ast; } + ASTPtr getPartitionExpression() const override { return data.partition_expr_ast; } + + ASTPtr getOrderExpression() const override { return data.secondary_sort_expr_ast; } + + Names getSamplingExpressionNames() const override; + + Names getPrimaryExpressionNames() const override; + + Names getPartitionExpressionNames() const override; + + Names getOrderExpressionNames() const override; private: String path; String database_name; @@ -148,6 +149,12 @@ private: void clearOldMutations(); + // Partition helpers + void dropPartition(const ASTPtr & partition, bool detach, const Context & context); + void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context); + void attachPartition(const ASTPtr & partition, bool part, const Context & context); + void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context); + friend class MergeTreeBlockOutputStream; friend class MergeTreeData; friend struct CurrentlyMergingPartsTagger; diff --git a/dbms/src/Storages/StorageMySQL.cpp b/dbms/src/Storages/StorageMySQL.cpp index 609c53486e..0d25e82dcf 100644 --- a/dbms/src/Storages/StorageMySQL.cpp +++ b/dbms/src/Storages/StorageMySQL.cpp @@ -100,7 +100,7 @@ public: } trans.commit(); } - catch(...) + catch (...) { trans.rollback(); throw; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 1d29a6dba8..022b630277 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -1014,7 +1014,7 @@ bool StorageReplicatedMergeTree::executeLogEntry(LogEntry & entry) } else { - throw Exception("Unexpected log entry type: " + toString(static_cast(entry.type))); + throw Exception("Unexpected log entry type: " + toString(static_cast(entry.type)), ErrorCodes::LOGICAL_ERROR); } if (do_fetch) @@ -1862,7 +1862,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) if (!prev.found_new_part_info.isDisjoint(curr.found_new_part_info)) { throw Exception("Intersected final parts detected: " + prev.found_new_part_name - + " and " + curr.found_new_part_name + ". It should be investigated."); + + " and " + curr.found_new_part_name + ". It should be investigated.", ErrorCodes::INCORRECT_DATA); } } } @@ -2943,7 +2943,7 @@ StorageReplicatedMergeTree::~StorageReplicatedMergeTree() { shutdown(); } - catch(...) + catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); } @@ -3282,6 +3282,58 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, LOG_DEBUG(log, "ALTER finished"); } +void StorageReplicatedMergeTree::partition(const ASTPtr & query, const PartitionCommands & commands, const Context & context) +{ + for (const PartitionCommand & command : commands) + { + switch (command.type) + { + case PartitionCommand::DROP_PARTITION: + checkPartitionCanBeDropped(command.partition); + dropPartition(query, command.partition, command.detach, context); + break; + + case PartitionCommand::ATTACH_PARTITION: + attachPartition(command.partition, command.part, context); + break; + + case PartitionCommand::REPLACE_PARTITION: + { + checkPartitionCanBeDropped(command.partition); + String from_database = command.from_database.empty() ? context.getCurrentDatabase() : command.from_database; + auto from_storage = context.getTable(from_database, command.from_table); + replacePartitionFrom(from_storage, command.partition, command.replace, context); + } + break; + + case PartitionCommand::FETCH_PARTITION: + fetchPartition(command.partition, command.from_zookeeper_path, context); + break; + + case PartitionCommand::FREEZE_PARTITION: + { + auto lock = lockStructure(false, __PRETTY_FUNCTION__); + data.freezePartition(command.partition, command.with_name, context); + } + break; + + case PartitionCommand::CLEAR_COLUMN: + clearColumnInPartition(command.partition, command.column_name, context); + break; + + case PartitionCommand::FREEZE_ALL_PARTITIONS: + { + auto lock = lockStructure(false, __PRETTY_FUNCTION__); + data.freezeAll(command.with_name, context); + } + break; + + default: + IStorage::partition(query, commands, context); // should throw an exception. + } + } +} + /// If new version returns ordinary name, else returns part name containing the first and last month of the month static String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const MergeTreePartInfo & part_info) @@ -3380,6 +3432,8 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & query, const ASTPt if (!is_leader) { + // TODO: we can manually reconstruct the query from outside the |dropPartition()| and remove the |query| argument from interface. + // It's the only place where we need this argument. sendRequestToLeaderReplica(query, context.getSettingsRef()); return; } @@ -3427,6 +3481,8 @@ void StorageReplicatedMergeTree::truncate(const ASTPtr & query) void StorageReplicatedMergeTree::attachPartition(const ASTPtr & partition, bool attach_part, const Context & context) { + // TODO: should get some locks to prevent race with 'alter … modify column' + assertNotReadonly(); String partition_id; @@ -4154,12 +4210,6 @@ void StorageReplicatedMergeTree::fetchPartition(const ASTPtr & partition, const } -void StorageReplicatedMergeTree::freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) -{ - data.freezePartition(partition, with_name, context); -} - - void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, const Context &) { /// Overview of the mutation algorithm. @@ -4856,4 +4906,34 @@ bool StorageReplicatedMergeTree::dropPartsInPartition( return true; } +Names StorageReplicatedMergeTree::getSamplingExpressionNames() const +{ + NameOrderedSet names; + const auto & expr = data.sampling_expression; + if (expr) + expr->collectIdentifierNames(names); + + return Names(names.begin(), names.end()); +} + +Names StorageReplicatedMergeTree::getPrimaryExpressionNames() const +{ + return data.getPrimarySortColumns(); +} + +Names StorageReplicatedMergeTree::getOrderExpressionNames() const +{ + return data.getSortColumns(); +} + +Names StorageReplicatedMergeTree::getPartitionExpressionNames() const +{ + NameOrderedSet names; + const auto & expr = data.partition_expr_ast; + if (expr) + expr->collectIdentifierNames(names); + + return Names(names.cbegin(), names.cend()); +} + } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 491f30d93e..9bc6d4f08f 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -116,12 +116,7 @@ public: void alter(const AlterCommands & params, const String & database_name, const String & table_name, const Context & context) override; - void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context) override; - void dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & context) override; - void attachPartition(const ASTPtr & partition, bool part, const Context & context) override; - void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) override; - void fetchPartition(const ASTPtr & partition, const String & from, const Context & context) override; - void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) override; + void partition(const ASTPtr & query, const PartitionCommands & commands, const Context & context) override; void mutate(const MutationCommands & commands, const Context & context) override; @@ -197,6 +192,14 @@ public: ASTPtr getPrimaryExpression() const override { return data.primary_expr_ast; } + Names getSamplingExpressionNames() const override; + + Names getPrimaryExpressionNames() const override; + + Names getOrderExpressionNames() const override; + + Names getPartitionExpressionNames() const override; + private: /// Delete old parts from disk and from ZooKeeper. void clearOldPartsAndRemoveFromZK(); @@ -512,6 +515,13 @@ private: bool dropPartsInPartition(zkutil::ZooKeeper & zookeeper, String & partition_id, StorageReplicatedMergeTree::LogEntry & entry, bool detach); + // Partition helpers + void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context); + void dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & context); + void attachPartition(const ASTPtr & partition, bool part, const Context & context); + void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context); + void fetchPartition(const ASTPtr & partition, const String & from, const Context & context); + protected: /** If not 'attach', either creates a new table in ZK, or adds a replica to an existing table. */ diff --git a/dbms/src/Storages/StorageXDBC.cpp b/dbms/src/Storages/StorageXDBC.cpp index 40ce763f07..ac8f156305 100644 --- a/dbms/src/Storages/StorageXDBC.cpp +++ b/dbms/src/Storages/StorageXDBC.cpp @@ -113,7 +113,7 @@ namespace for (size_t i = 0; i < 3; ++i) engine_args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[i], args.local_context); - BridgeHelperPtr bridge_helper = std::make_shared>(args.context.getConfigRef(), + BridgeHelperPtr bridge_helper = std::make_shared>(args.context, args.context.getSettingsRef().http_receive_timeout.value, static_cast(*engine_args[0]).value.safeGet()); return std::make_shared(args.table_name, diff --git a/dbms/src/Storages/System/CMakeLists.txt b/dbms/src/Storages/System/CMakeLists.txt index b4783ffa31..418c25de5f 100644 --- a/dbms/src/Storages/System/CMakeLists.txt +++ b/dbms/src/Storages/System/CMakeLists.txt @@ -12,5 +12,4 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) add_headers_and_sources(storages_system .) list (APPEND storages_system_sources ${CONFIG_BUILD}) add_library(clickhouse_storages_system ${LINK_MODE} ${storages_system_headers} ${storages_system_sources}) -target_link_libraries(clickhouse_storages_system dbms) -target_include_directories(clickhouse_storages_system PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(clickhouse_storages_system PRIVATE dbms common string_utils clickhouse_common_zookeeper) diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 57c418d7f9..5c72545ab4 100644 --- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -6,6 +6,7 @@ const char * auto_config_build[] "VERSION_DESCRIBE", "@VERSION_DESCRIBE@", "VERSION_GITHASH", "@VERSION_GITHASH@", "VERSION_REVISION", "@VERSION_REVISION@", + "VERSION_INTEGER", "@VERSION_INTEGER@", "BUILD_DATE", "@BUILD_DATE@", "BUILD_TYPE", "@CMAKE_BUILD_TYPE@", "SYSTEM", "@CMAKE_SYSTEM@", @@ -38,6 +39,7 @@ const char * auto_config_build[] "USE_POCO_DATAODBC", "@USE_POCO_DATAODBC@", "USE_POCO_MONGODB", "@USE_POCO_MONGODB@", "USE_POCO_NETSSL", "@USE_POCO_NETSSL@", + "USE_BASE64", "@USE_BASE64@", nullptr, nullptr }; diff --git a/dbms/src/Storages/System/StorageSystemColumns.cpp b/dbms/src/Storages/System/StorageSystemColumns.cpp index dbb80c62c6..91c514dba4 100644 --- a/dbms/src/Storages/System/StorageSystemColumns.cpp +++ b/dbms/src/Storages/System/StorageSystemColumns.cpp @@ -36,6 +36,11 @@ StorageSystemColumns::StorageSystemColumns(const std::string & name_) { "data_compressed_bytes", std::make_shared() }, { "data_uncompressed_bytes", std::make_shared() }, { "marks_bytes", std::make_shared() }, + { "comment", std::make_shared() }, + { "is_in_primary_key", std::make_shared() }, + { "is_in_order_key", std::make_shared() }, + { "is_in_partition_key", std::make_shared() }, + { "is_in_sample_key", std::make_shared() }, })); } @@ -80,6 +85,11 @@ protected: NamesAndTypesList columns; ColumnDefaults column_defaults; + ColumnComments column_comments; + Names partition_key_names; + Names order_key_names; + Names primary_key_names; + Names sampling_key_names; MergeTreeData::ColumnSizeByName column_sizes; { @@ -105,6 +115,12 @@ protected: columns = storage->getColumns().getAll(); column_defaults = storage->getColumns().defaults; + column_comments = storage->getColumns().comments; + + partition_key_names = storage->getPartitionExpressionNames(); + order_key_names = storage->getOrderExpressionNames(); + primary_key_names = storage->getPrimaryExpressionNames(); + sampling_key_names = storage->getSamplingExpressionNames(); /** Info about sizes of columns for tables of MergeTree family. * NOTE: It is possible to add getter for this info to IStorage interface. @@ -173,6 +189,36 @@ protected: } } + { + const auto it = column_comments.find(column.name); + if (it == std::end(column_comments)) + { + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + } + else + { + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(it->second); + } + } + + { + auto find_in_vector = [&key = column.name](const Names& names) + { + return std::find(names.cbegin(), names.cend(), key) != names.end(); + }; + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(find_in_vector(primary_key_names)); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(find_in_vector(order_key_names)); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(find_in_vector(partition_key_names)); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(find_in_vector(sampling_key_names)); + } + ++rows_count; } } diff --git a/dbms/src/Storages/System/StorageSystemPartsBase.cpp b/dbms/src/Storages/System/StorageSystemPartsBase.cpp index 8cbf497e43..9d7b274572 100644 --- a/dbms/src/Storages/System/StorageSystemPartsBase.cpp +++ b/dbms/src/Storages/System/StorageSystemPartsBase.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -306,7 +307,7 @@ StorageSystemPartsBase::StorageSystemPartsBase(std::string name_, NamesAndTypesL add_alias("bytes", "bytes_on_disk"); add_alias("marks_size", "marks_bytes"); - setColumns(ColumnsDescription(std::move(columns_), {}, std::move(aliases), std::move(defaults))); + setColumns(ColumnsDescription(std::move(columns_), {}, std::move(aliases), std::move(defaults), ColumnComments{})); } } diff --git a/dbms/src/Storages/System/StorageSystemTables.cpp b/dbms/src/Storages/System/StorageSystemTables.cpp index 8ba44b1d4a..1f2c24e1fb 100644 --- a/dbms/src/Storages/System/StorageSystemTables.cpp +++ b/dbms/src/Storages/System/StorageSystemTables.cpp @@ -39,7 +39,11 @@ StorageSystemTables::StorageSystemTables(const std::string & name_) {"dependencies_database", std::make_shared(std::make_shared())}, {"dependencies_table", std::make_shared(std::make_shared())}, {"create_table_query", std::make_shared()}, - {"engine_full", std::make_shared()} + {"engine_full", std::make_shared()}, + {"primary_key", std::make_shared()}, + {"order_key", std::make_shared()}, + {"partition_key", std::make_shared()}, + {"sample_key", std::make_shared()}, })); } @@ -65,7 +69,7 @@ public: size_t max_block_size, ColumnPtr databases, const Context & context) - : columns_mask(columns_mask), header(header), max_block_size(max_block_size), databases(std::move(databases)), context(context) {} + : columns_mask(std::move(columns_mask)), header(std::move(header)), max_block_size(max_block_size), databases(std::move(databases)), context(context) {} String getName() const override { return "Tables"; } Block getHeader() const override { return header; } @@ -144,6 +148,18 @@ protected: if (columns_mask[src_index++]) res_columns[res_index++]->insert(table.second->getName()); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); } } @@ -173,7 +189,7 @@ protected: res_columns[res_index++]->insert(tables_it->table()->getName()); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(0u); + res_columns[res_index++]->insert(0u); // is_temporary if (columns_mask[src_index++]) res_columns[res_index++]->insert(tables_it->table()->getDataPath()); @@ -234,6 +250,42 @@ protected: res_columns[res_index++]->insert(engine_full); } } + else + src_index += 2; + + const auto table_it = context.getTable(database_name, table_name); + ASTPtr expression_ptr; + if (columns_mask[src_index++]) + { + if ((expression_ptr = table_it->getPrimaryExpression())) + res_columns[res_index++]->insert(queryToString(expression_ptr)); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if ((expression_ptr = table_it->getOrderExpression())) + res_columns[res_index++]->insert(queryToString(expression_ptr)); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if ((expression_ptr = table_it->getPartitionExpression())) + res_columns[res_index++]->insert(queryToString(expression_ptr)); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if ((expression_ptr = table_it->getSamplingExpression())) + res_columns[res_index++]->insert(queryToString(expression_ptr)); + else + res_columns[res_index++]->insertDefault(); + } } } diff --git a/dbms/src/Storages/System/StorageSystemZooKeeper.cpp b/dbms/src/Storages/System/StorageSystemZooKeeper.cpp index db498d620d..dd7eb03351 100644 --- a/dbms/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/dbms/src/Storages/System/StorageSystemZooKeeper.cpp @@ -15,6 +15,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + NamesAndTypesList StorageSystemZooKeeper::getNamesAndTypes() { @@ -103,7 +108,7 @@ void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, const Contex { String path = extractPath(query_info.query); if (path.empty()) - throw Exception("SELECT from system.zookeeper table must contain condition like path = 'path' in WHERE clause."); + throw Exception("SELECT from system.zookeeper table must contain condition like path = 'path' in WHERE clause.", ErrorCodes::BAD_ARGUMENTS); zkutil::ZooKeeperPtr zookeeper = context.getZooKeeper(); diff --git a/dbms/src/Storages/tests/CMakeLists.txt b/dbms/src/Storages/tests/CMakeLists.txt index 63ff90d305..2942ecfe6b 100644 --- a/dbms/src/Storages/tests/CMakeLists.txt +++ b/dbms/src/Storages/tests/CMakeLists.txt @@ -1,29 +1,29 @@ add_executable (system_numbers system_numbers.cpp) -target_link_libraries (system_numbers dbms clickhouse_storages_system) +target_link_libraries (system_numbers PRIVATE dbms clickhouse_storages_system clickhouse_common_io) add_executable (storage_log storage_log.cpp) -target_link_libraries (storage_log dbms) +target_link_libraries (storage_log PRIVATE dbms) add_executable (seek_speed_test seek_speed_test.cpp) -target_link_libraries (seek_speed_test dbms) +target_link_libraries (seek_speed_test PRIVATE dbms) add_executable (part_checker part_checker.cpp) -target_link_libraries (part_checker dbms) +target_link_libraries (part_checker PRIVATE dbms) add_executable (part_name part_name.cpp) -target_link_libraries (part_name dbms) +target_link_libraries (part_name PRIVATE dbms) add_executable (remove_symlink_directory remove_symlink_directory.cpp) -target_link_libraries (remove_symlink_directory dbms) +target_link_libraries (remove_symlink_directory PRIVATE dbms) add_executable (merge_selector merge_selector.cpp) -target_link_libraries (merge_selector dbms) +target_link_libraries (merge_selector PRIVATE dbms) add_executable (merge_selector2 merge_selector2.cpp) -target_link_libraries (merge_selector2 dbms) +target_link_libraries (merge_selector2 PRIVATE dbms) add_executable (get_current_inserts_in_replicated get_current_inserts_in_replicated.cpp) -target_link_libraries (get_current_inserts_in_replicated dbms) +target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper string_utils) add_executable (get_abandonable_lock_in_all_partitions get_abandonable_lock_in_all_partitions.cpp) -target_link_libraries (get_abandonable_lock_in_all_partitions dbms) +target_link_libraries (get_abandonable_lock_in_all_partitions PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper) diff --git a/dbms/src/Storages/tests/remove_symlink_directory.cpp b/dbms/src/Storages/tests/remove_symlink_directory.cpp index 59011c557e..8098ee5dc3 100644 --- a/dbms/src/Storages/tests/remove_symlink_directory.cpp +++ b/dbms/src/Storages/tests/remove_symlink_directory.cpp @@ -5,6 +5,14 @@ #include +namespace DB +{ + namespace ErrorCodes + { + extern const int SYSTEM_ERROR; + } +} + int main(int, char **) try { @@ -14,7 +22,7 @@ try Poco::File("./test_dir/file").createFile(); if (0 != symlink("./test_dir", "./test_link")) - DB::throwFromErrno("Cannot create symlink"); + DB::throwFromErrno("Cannot create symlink", DB::ErrorCodes::SYSTEM_ERROR); Poco::File link("./test_link"); link.renameTo("./test_link2"); diff --git a/dbms/src/TableFunctions/CMakeLists.txt b/dbms/src/TableFunctions/CMakeLists.txt index 3eea07fd8e..0ab5acec19 100644 --- a/dbms/src/TableFunctions/CMakeLists.txt +++ b/dbms/src/TableFunctions/CMakeLists.txt @@ -5,4 +5,4 @@ list(REMOVE_ITEM clickhouse_table_functions_sources ITableFunction.cpp TableFunc list(REMOVE_ITEM clickhouse_table_functions_headers ITableFunction.h TableFunctionFactory.h) add_library(clickhouse_table_functions ${LINK_MODE} ${clickhouse_table_functions_sources}) -target_link_libraries(clickhouse_table_functions clickhouse_storages_system dbms ${Poco_Foundation_LIBRARY}) +target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_storages_system dbms ${Poco_Foundation_LIBRARY}) diff --git a/dbms/src/TableFunctions/ITableFunctionXDBC.cpp b/dbms/src/TableFunctions/ITableFunctionXDBC.cpp index c09d346308..455512310d 100644 --- a/dbms/src/TableFunctions/ITableFunctionXDBC.cpp +++ b/dbms/src/TableFunctions/ITableFunctionXDBC.cpp @@ -59,10 +59,9 @@ StoragePtr ITableFunctionXDBC::executeImpl(const ASTPtr & ast_function, const Co table_name = static_cast(*args[1]).value.safeGet(); } - const auto & config = context.getConfigRef(); - /* Infer external table structure */ - BridgeHelperPtr helper = createBridgeHelper(config, context.getSettingsRef().http_receive_timeout.value, connection_string); + /// Have to const_cast, because bridges store their commands inside context + BridgeHelperPtr helper = createBridgeHelper(const_cast(context), context.getSettingsRef().http_receive_timeout.value, connection_string); helper->startBridgeSync(); Poco::URI columns_info_uri = helper->getColumnsInfoURI(); @@ -95,4 +94,4 @@ void registerTableFunctionODBC(TableFunctionFactory & factory) { factory.registerFunction(); } -} \ No newline at end of file +} diff --git a/dbms/src/TableFunctions/ITableFunctionXDBC.h b/dbms/src/TableFunctions/ITableFunctionXDBC.h index 3a753457f5..8676b85deb 100644 --- a/dbms/src/TableFunctions/ITableFunctionXDBC.h +++ b/dbms/src/TableFunctions/ITableFunctionXDBC.h @@ -18,7 +18,7 @@ private: StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; /* A factory method to create bridge helper, that will assist in remote interaction */ - virtual BridgeHelperPtr createBridgeHelper(const Poco::Util::AbstractConfiguration & config_, + virtual BridgeHelperPtr createBridgeHelper(Context & context, const Poco::Timespan & http_timeout_, const std::string & connection_string_) const = 0; }; @@ -33,11 +33,11 @@ public: } private: - BridgeHelperPtr createBridgeHelper(const Poco::Util::AbstractConfiguration & config_, + BridgeHelperPtr createBridgeHelper(Context & context, const Poco::Timespan & http_timeout_, const std::string & connection_string_) const override { - return std::make_shared>(config_, http_timeout_, connection_string_); + return std::make_shared>(context, http_timeout_, connection_string_); } }; @@ -51,11 +51,11 @@ public: } private: - BridgeHelperPtr createBridgeHelper(const Poco::Util::AbstractConfiguration & config_, + BridgeHelperPtr createBridgeHelper(Context & context, const Poco::Timespan & http_timeout_, const std::string & connection_string_) const override { - return std::make_shared>(config_, http_timeout_, connection_string_); + return std::make_shared>(context, http_timeout_, connection_string_); } }; } diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index fc70d3f649..626881aafb 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -277,7 +277,7 @@ def main(args): result_is_different = subprocess.call(['cmp', '-s', reference_file, stdout_file], stdout = PIPE) if result_is_different: - (diff, _) = Popen(['diff', '--side-by-side', reference_file, stdout_file], stdout = PIPE).communicate() + (diff, _) = Popen(['diff', '--unified', reference_file, stdout_file], stdout = PIPE).communicate() diff = unicode(diff, errors='replace', encoding='utf-8') failure = et.Element("failure", attrib = {"message": "result differs with reference"}) diff --git a/dbms/tests/external_dictionaries/generate_and_test.py b/dbms/tests/external_dictionaries/generate_and_test.py index 14b266a1de..2c72d29de9 100755 --- a/dbms/tests/external_dictionaries/generate_and_test.py +++ b/dbms/tests/external_dictionaries/generate_and_test.py @@ -741,8 +741,10 @@ def run_tests(args): keys = [ 'toUInt64(n)', '(n, n)', '(toString(n), n)', 'toUInt64(n)' ] dict_get_query_skeleton = "select dictGet{type}('{name}', '{type}_', {key}) from system.one array join range(8) as n;" + dict_get_notype_query_skeleton = "select dictGet('{name}', '{type}_', {key}) from system.one array join range(8) as n;" dict_has_query_skeleton = "select dictHas('{name}', {key}) from system.one array join range(8) as n;" dict_get_or_default_query_skeleton = "select dictGet{type}OrDefault('{name}', '{type}_', {key}, to{type}({default})) from system.one array join range(8) as n;" + dict_get_notype_or_default_query_skeleton = "select dictGetOrDefault('{name}', '{type}_', {key}, to{type}({default})) from system.one array join range(8) as n;" dict_hierarchy_query_skeleton = "select dictGetHierarchy('{name}' as d, key), dictIsIn(d, key, toUInt64(1)), dictIsIn(d, key, key) from system.one array join range(toUInt64(8)) as key;" # Designed to match 4 rows hit, 4 rows miss pattern of reference file dict_get_query_range_hashed_skeleton = """ @@ -751,6 +753,12 @@ def run_tests(args): array join range(4) as n cross join (select r from system.one array join array({hit}, {miss}) as r); """ + dict_get_notype_query_range_hashed_skeleton = """ + select dictGet('{name}', '{type}_', {key}, r) + from system.one + array join range(4) as n + cross join (select r from system.one array join array({hit}, {miss}) as r); + """ def test_query(dict, query, reference, name): global failures @@ -877,6 +885,9 @@ def run_tests(args): test_query(name, dict_get_query_range_hashed_skeleton.format(**locals()), type, 'dictGet' + type) + test_query(name, + dict_get_notype_query_range_hashed_skeleton.format(**locals()), + type, 'dictGet' + type) else: # query dictHas is not supported for range_hashed dictionaries @@ -889,9 +900,15 @@ def run_tests(args): test_query(name, dict_get_query_skeleton.format(**locals()), type, 'dictGet' + type) + test_query(name, + dict_get_notype_query_skeleton.format(**locals()), + type, 'dictGet' + type) test_query(name, dict_get_or_default_query_skeleton.format(**locals()), type + 'OrDefault', 'dictGet' + type + 'OrDefault') + test_query(name, + dict_get_notype_or_default_query_skeleton.format(**locals()), + type + 'OrDefault', 'dictGet' + type + 'OrDefault') # query dictGetHierarchy, dictIsIn if has_parent: diff --git a/dbms/tests/integration/.dockerignore b/dbms/tests/integration/.dockerignore new file mode 100644 index 0000000000..7668361806 --- /dev/null +++ b/dbms/tests/integration/.dockerignore @@ -0,0 +1,2 @@ +**/__pycache__ +**/*.pyc diff --git a/dbms/tests/integration/README.md b/dbms/tests/integration/README.md index 822e37e0aa..84db387672 100644 --- a/dbms/tests/integration/README.md +++ b/dbms/tests/integration/README.md @@ -2,7 +2,7 @@ This directory contains tests that involve several ClickHouse instances, custom configs, ZooKeeper, etc. -### Running +### Running natively Prerequisites: * Ubuntu 14.04 (Trusty) or higher. @@ -30,6 +30,61 @@ set the following environment variables: * `CLICKHOUSE_TESTS_CLIENT_BIN_PATH` to choose the client binary. * `CLICKHOUSE_TESTS_BASE_CONFIG_DIR` to choose the directory from which base configs (`config.xml` and `users.xml`) are taken. + +### Running with runner script + +The only requirement is fresh docker with access to the internet. You can check it with: +``` +$ docker run ubuntu:14.04 ping github.com +PING github.com (140.82.118.3) 56(84) bytes of data. +64 bytes from 140.82.118.3: icmp_seq=1 ttl=53 time=40.1 ms +64 bytes from 140.82.118.3: icmp_seq=2 ttl=53 time=40.4 ms +64 bytes from 140.82.118.3: icmp_seq=3 ttl=53 time=40.3 ms +64 bytes from 140.82.118.3: icmp_seq=4 ttl=53 time=40.1 ms + +--- github.com ping statistics --- +4 packets transmitted, 4 received, 0% packet loss, time 19823ms +rtt min/avg/max/mdev = 40.157/40.284/40.463/0.278 ms +``` + +You can run tests via `./runner` script and pass pytest arguments as last arg: +``` +$ ./runner --binary $HOME/ClickHouse/dbms/programs/clickhouse --configs-dir $HOME/ClickHouse/dbms/programs/server/ 'test_odbc_interaction -ss' +Start tests +============================= test session starts ============================== +platform linux2 -- Python 2.7.15rc1, pytest-4.0.0, py-1.7.0, pluggy-0.8.0 +rootdir: /ClickHouse/dbms/tests/integration, inifile: pytest.ini +collected 6 items + +test_odbc_interaction/test.py Removing network clickhouse_default +... + +Killing roottestodbcinteraction_node1_1 ... done +Killing roottestodbcinteraction_mysql1_1 ... done +Killing roottestodbcinteraction_postgres1_1 ... done +Removing roottestodbcinteraction_node1_1 ... done +Removing roottestodbcinteraction_mysql1_1 ... done +Removing roottestodbcinteraction_postgres1_1 ... done +Removing network roottestodbcinteraction_default + +==================== 6 passed, 1 warnings in 95.21 seconds ===================== + +``` + +Path to binary and configs maybe specified via env variables: +``` +$ export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=$HOME/ClickHouse/dbms/programs/server/ +$ export CLICKHOUSE_TESTS_SERVER_BIN_PATH=$HOME/ClickHouse/dbms/programs/clickhouse +$ ./runner 'test_odbc_interaction' +Start tests +============================= test session starts ============================== +platform linux2 -- Python 2.7.15rc1, pytest-4.0.0, py-1.7.0, pluggy-0.8.0 +rootdir: /ClickHouse/dbms/tests/integration, inifile: pytest.ini +collected 6 items + +test_odbc_interaction/test.py ...... [100%] +==================== 6 passed, 1 warnings in 96.33 seconds ===================== +``` ### Adding new tests diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index e124b15e01..39227369c2 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -94,7 +94,7 @@ class ClickHouseCluster: cmd += " client" return cmd - def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, hostname=None, env_variables={}, image="ubuntu:14.04"): + def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False): """Add an instance to the cluster. name - the name of the instance directory and the value of the 'instance' macro in ClickHouse. @@ -113,7 +113,7 @@ class ClickHouseCluster: instance = ClickHouseInstance( self, self.base_dir, name, config_dir, main_configs, user_configs, macros, with_zookeeper, self.zookeeper_config_path, with_mysql, with_kafka, self.base_configs_dir, self.server_bin_path, - clickhouse_path_dir, with_odbc_drivers, hostname=hostname, env_variables=env_variables, image=image) + clickhouse_path_dir, with_odbc_drivers, hostname=hostname, env_variables=env_variables, image=image, stay_alive=stay_alive) self.instances[name] = instance self.base_cmd.extend(['--file', instance.docker_compose_path]) @@ -300,6 +300,10 @@ class ClickHouseCluster: self.pre_zookeeper_commands.append(command) +CLICKHOUSE_START_COMMAND = "clickhouse server --config-file=/etc/clickhouse-server/config.xml --log-file=/var/log/clickhouse-server/clickhouse-server.log --errorlog-file=/var/log/clickhouse-server/clickhouse-server.err.log" + +CLICKHOUSE_STAY_ALIVE_COMMAND = 'bash -c "{} --daemon; tail -f /dev/null"'.format(CLICKHOUSE_START_COMMAND) + DOCKER_COMPOSE_TEMPLATE = ''' version: '2' services: @@ -313,24 +317,18 @@ services: - {db_dir}:/var/lib/clickhouse/ - {logs_dir}:/var/log/clickhouse-server/ {odbc_ini_path} - entrypoint: - - clickhouse - - server - - --config-file=/etc/clickhouse-server/config.xml - - --log-file=/var/log/clickhouse-server/clickhouse-server.log - - --errorlog-file=/var/log/clickhouse-server/clickhouse-server.err.log + entrypoint: {entrypoint_cmd} depends_on: {depends_on} env_file: - {env_file} ''' - class ClickHouseInstance: def __init__( self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros, with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, base_configs_dir, server_bin_path, - clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables={}, image="ubuntu:14.04"): + clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False): self.name = name self.base_cmd = cluster.base_cmd[:] @@ -366,6 +364,7 @@ class ClickHouseInstance: self.client = None self.default_timeout = 20.0 # 20 sec self.image = image + self.stay_alive = stay_alive # Connects to the instance via clickhouse-client, sends a query (1st argument) and returns the answer def query(self, sql, stdin=None, timeout=None, settings=None, user=None, ignore_error=False): @@ -579,6 +578,11 @@ class ClickHouseInstance: self._create_odbc_config_file() odbc_ini_path = '- ' + self.odbc_ini_path + entrypoint_cmd = CLICKHOUSE_START_COMMAND + + if self.stay_alive: + entrypoint_cmd = CLICKHOUSE_STAY_ALIVE_COMMAND + with open(self.docker_compose_path, 'w') as docker_compose: docker_compose.write(DOCKER_COMPOSE_TEMPLATE.format( image=self.image, @@ -593,6 +597,7 @@ class ClickHouseInstance: depends_on=str(depends_on), env_file=env_file, odbc_ini_path=odbc_ini_path, + entrypoint_cmd=entrypoint_cmd, )) diff --git a/dbms/tests/integration/image/Dockerfile b/dbms/tests/integration/image/Dockerfile new file mode 100644 index 0000000000..4216f8efff --- /dev/null +++ b/dbms/tests/integration/image/Dockerfile @@ -0,0 +1,61 @@ +FROM ubuntu + + +RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes --force-yes \ + ca-certificates \ + bash \ + btrfs-progs \ + e2fsprogs \ + iptables \ + xfsprogs \ + tar \ + pigz \ + wget \ + git \ + iproute2 \ + module-init-tools \ + cgroupfs-mount \ + python-pip \ + tzdata + +ENV TZ=Europe/Moscow +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2 + +ENV DOCKER_CHANNEL stable +ENV DOCKER_VERSION 18.09.0 + +RUN set -eux; \ + \ +# this "case" statement is generated via "update.sh" + \ + if ! wget -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/x86_64/docker-${DOCKER_VERSION}.tgz"; then \ + echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${x86_64}'"; \ + exit 1; \ + fi; \ + \ + tar --extract \ + --file docker.tgz \ + --strip-components 1 \ + --directory /usr/local/bin/ \ + ; \ + rm docker.tgz; \ + \ + dockerd --version; \ + docker --version + +COPY modprobe.sh /usr/local/bin/modprobe +COPY dockerd-entrypoint.sh /usr/local/bin/ + +RUN set -x \ + && addgroup --system dockremap \ + && adduser --system dockremap \ + && adduser dockremap dockremap \ + && echo 'dockremap:165536:65536' >> /etc/subuid \ + && echo 'dockremap:165536:65536' >> /etc/subgid + +VOLUME /var/lib/docker +EXPOSE 2375 +ENTRYPOINT ["dockerd-entrypoint.sh"] +CMD [] \ No newline at end of file diff --git a/dbms/tests/integration/image/dockerd-entrypoint.sh b/dbms/tests/integration/image/dockerd-entrypoint.sh new file mode 100755 index 0000000000..d8bf951102 --- /dev/null +++ b/dbms/tests/integration/image/dockerd-entrypoint.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -e + +dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 &>/var/log/somefile & + +sleep 3 # to actual start + +echo "Start tests" +export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse +export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/clickhouse +export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/clickhouse-config + +cd /ClickHouse/dbms/tests/integration && pytest $PYTEST_OPTS diff --git a/dbms/tests/integration/image/modprobe.sh b/dbms/tests/integration/image/modprobe.sh new file mode 100755 index 0000000000..b357d893fd --- /dev/null +++ b/dbms/tests/integration/image/modprobe.sh @@ -0,0 +1,20 @@ +#!/bin/sh +set -eu + +# "modprobe" without modprobe +# https://twitter.com/lucabruno/status/902934379835662336 + +# this isn't 100% fool-proof, but it'll have a much higher success rate than simply using the "real" modprobe + +# Docker often uses "modprobe -va foo bar baz" +# so we ignore modules that start with "-" +for module; do + if [ "${module#-}" = "$module" ]; then + ip link show "$module" || true + lsmod | grep "$module" || true + fi +done + +# remove /usr/local/... from PATH so we can exec the real modprobe as a last resort +export PATH='/usr/sbin:/usr/bin:/sbin:/bin' +exec modprobe "$@" diff --git a/dbms/tests/integration/runner b/dbms/tests/integration/runner new file mode 100755 index 0000000000..ab1ca8003a --- /dev/null +++ b/dbms/tests/integration/runner @@ -0,0 +1,43 @@ +#!/usr/bin/env python +#-*- coding: utf-8 -*- +import subprocess +import os +import argparse +import logging + +CUR_FILE_DIR_PATH = os.path.dirname(os.path.realpath(__file__)) +DEFAULT_CLICKHOUSE_ROOT = os.path.abspath(os.path.join(CUR_FILE_DIR_PATH, "../../../")) + +DIND_INTEGRATION_TESTS_IMAGE_NAME = "yandex/clickhouse-integration-tests-runner" + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') + parser = argparse.ArgumentParser(description="ClickHouse integration tests runner") + parser.add_argument( + "--binary", + default=os.environ.get("CLICKHOUSE_TESTS_SERVER_BIN_PATH", os.environ.get("CLICKHOUSE_TESTS_CLIENT_BIN_PATH", "/usr/bin/clickhouse")), + help="Path to clickhouse binary") + parser.add_argument( + "--configs-dir", + default=os.environ.get("CLICKHOUSE_TESTS_BASE_CONFIG_DIR", "/etc/clickhouse-server"), + help="Path to clickhouse configs directory" + ) + parser.add_argument( + "--clickhouse-root", + default=DEFAULT_CLICKHOUSE_ROOT, + help="Path to repository root folder" + ) + parser.add_argument('pytest_args', nargs='*', help="args for pytest command") + + args = parser.parse_args() + + cmd = "docker run --net=host --privileged --volume={bin}:/clickhouse \ + --volume={cfg}:/clickhouse-config --volume={pth}:/ClickHouse -e PYTEST_OPTS='{opts}' {img}".format( + bin=args.binary, + cfg=args.configs_dir, + pth=args.clickhouse_root, + opts=' '.join(args.pytest_args), + img=DIND_INTEGRATION_TESTS_IMAGE_NAME, + ) + + subprocess.check_call(cmd, shell=True) diff --git a/dbms/tests/integration/test_max_http_connections_for_replication/__init__.py b/dbms/tests/integration/test_max_http_connections_for_replication/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/dbms/tests/integration/test_max_http_connections_for_replication/configs/log_conf.xml b/dbms/tests/integration/test_max_http_connections_for_replication/configs/log_conf.xml new file mode 100644 index 0000000000..0de2745ca4 --- /dev/null +++ b/dbms/tests/integration/test_max_http_connections_for_replication/configs/log_conf.xml @@ -0,0 +1,11 @@ + + + trace + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/log.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + diff --git a/dbms/tests/integration/test_max_http_connections_for_replication/configs/remote_servers.xml b/dbms/tests/integration/test_max_http_connections_for_replication/configs/remote_servers.xml new file mode 100644 index 0000000000..e44d4eef3c --- /dev/null +++ b/dbms/tests/integration/test_max_http_connections_for_replication/configs/remote_servers.xml @@ -0,0 +1,40 @@ + + + + + true + + test + node1 + 9000 + + + test + node2 + 9000 + + + + + + true + + test + node3 + 9000 + + + test + node4 + 9000 + + + test + node5 + 9000 + + + + + + diff --git a/dbms/tests/integration/test_max_http_connections_for_replication/test.py b/dbms/tests/integration/test_max_http_connections_for_replication/test.py new file mode 100644 index 0000000000..5fbd7f26e9 --- /dev/null +++ b/dbms/tests/integration/test_max_http_connections_for_replication/test.py @@ -0,0 +1,91 @@ +import time +import pytest + +from helpers.cluster import ClickHouseCluster +from multiprocessing.dummy import Pool + +from helpers.test_tools import assert_eq_with_retry + +def _fill_nodes(nodes, shard, connections_count): + for node in nodes: + node.query( + ''' + CREATE DATABASE test; + + CREATE TABLE test_table(date Date, id UInt32, dummy UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test{shard}/replicated', '{replica}') + PARTITION BY date + ORDER BY id + SETTINGS + replicated_max_parallel_fetches_for_host={connections}, + index_granularity=8192; + '''.format(shard=shard, replica=node.name, connections=connections_count)) + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True) +node2 = cluster.add_instance('node2', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True) + +@pytest.fixture(scope="module") +def start_small_cluster(): + try: + cluster.start() + + _fill_nodes([node1, node2], 1, 1) + + yield cluster + + finally: + cluster.shutdown() + +def test_single_endpoint_connections_count(start_small_cluster): + + def task(count): + print("Inserting ten times from {}".format(count)) + for i in xrange(count, count + 10): + node1.query("insert into test_table values ('2017-06-16', {}, 0)".format(i)) + + p = Pool(10) + p.map(task, xrange(0, 100, 10)) + + assert_eq_with_retry(node1, "select count() from test_table", "100") + assert_eq_with_retry(node2, "select count() from test_table", "100") + + assert node2.query("SELECT value FROM system.events where event='CreatedHTTPConnections'") == '1\n' + +node3 = cluster.add_instance('node3', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True) +node4 = cluster.add_instance('node4', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True) +node5 = cluster.add_instance('node5', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True) + +@pytest.fixture(scope="module") +def start_big_cluster(): + try: + cluster.start() + + _fill_nodes([node3, node4, node5], 2, 2) + + yield cluster + + finally: + cluster.shutdown() + +def test_multiple_endpoint_connections_count(start_big_cluster): + + def task(count): + print("Inserting ten times from {}".format(count)) + if (count / 10) % 2 == 1: + node = node3 + else: + node = node4 + + for i in xrange(count, count + 10): + node.query("insert into test_table values ('2017-06-16', {}, 0)".format(i)) + + p = Pool(10) + p.map(task, xrange(0, 100, 10)) + + assert_eq_with_retry(node3, "select count() from test_table", "100") + assert_eq_with_retry(node4, "select count() from test_table", "100") + assert_eq_with_retry(node5, "select count() from test_table", "100") + + # two per each host + assert node5.query("SELECT value FROM system.events where event='CreatedHTTPConnections'") == '4\n' diff --git a/dbms/tests/integration/test_odbc_interaction/test.py b/dbms/tests/integration/test_odbc_interaction/test.py index 7b82d4a428..79652e6edb 100644 --- a/dbms/tests/integration/test_odbc_interaction/test.py +++ b/dbms/tests/integration/test_odbc_interaction/test.py @@ -10,7 +10,7 @@ from helpers.cluster import ClickHouseCluster SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) -node1 = cluster.add_instance('node1', with_odbc_drivers=True, with_mysql=True, image='alesapin/ubuntu_with_odbc:14.04', main_configs=['configs/dictionaries/sqlite3_odbc_hashed_dictionary.xml', 'configs/dictionaries/sqlite3_odbc_cached_dictionary.xml', 'configs/dictionaries/postgres_odbc_hashed_dictionary.xml']) +node1 = cluster.add_instance('node1', with_odbc_drivers=True, with_mysql=True, image='alesapin/ubuntu_with_odbc', main_configs=['configs/dictionaries/sqlite3_odbc_hashed_dictionary.xml', 'configs/dictionaries/sqlite3_odbc_cached_dictionary.xml', 'configs/dictionaries/postgres_odbc_hashed_dictionary.xml'], stay_alive=True) create_table_sql_template = """ CREATE TABLE `clickhouse`.`{}` ( @@ -176,3 +176,34 @@ def test_postgres_odbc_hached_dictionary_with_schema(started_cluster): time.sleep(5) assert node1.query("select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(1))") == "hello\n" assert node1.query("select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(2))") == "world\n" + +def test_bridge_dies_with_parent(started_cluster): + node1.query("select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(1))") + def get_pid(cmd): + output = node1.exec_in_container(["bash", "-c", "ps ax | grep '{}' | grep -v 'grep' | grep -v 'bash -c' | awk '{{print $1}}'".format(cmd)], privileged=True, user='root') + if output: + try: + pid = int(output.split('\n')[0].strip()) + return pid + except: + return None + return None + + clickhouse_pid = get_pid("clickhouse server") + bridge_pid = get_pid("odbc-bridge") + assert clickhouse_pid is not None + assert bridge_pid is not None + + while clickhouse_pid is not None: + try: + node1.exec_in_container(["bash", "-c", "kill {}".format(clickhouse_pid)], privileged=True, user='root') + except: + pass + clickhouse_pid = get_pid("clickhouse server") + time.sleep(1) + + time.sleep(1) # just for sure, that odbc-bridge caught signal + bridge_pid = get_pid("odbc-bridge") + + assert clickhouse_pid is None + assert bridge_pid is None diff --git a/dbms/tests/performance/test_hits/first_significant_subdomain.xml b/dbms/tests/performance/test_hits/first_significant_subdomain.xml new file mode 100644 index 0000000000..22998f670b --- /dev/null +++ b/dbms/tests/performance/test_hits/first_significant_subdomain.xml @@ -0,0 +1,29 @@ + + first_significant_subdomain + loop + + + + 3 + 10000 + + + 5 + 60000 + + + + + + + + + test.hits + + + + 1 + + + SELECT count() FROM test.hits WHERE NOT ignore(firstSignificantSubdomain(URL)) + diff --git a/dbms/tests/performance/test_hits/test_hits.xml b/dbms/tests/performance/test_hits/test_hits.xml index c9e30227ff..e98c6c8e62 100644 --- a/dbms/tests/performance/test_hits/test_hits.xml +++ b/dbms/tests/performance/test_hits/test_hits.xml @@ -86,8 +86,8 @@ PageCharset тоже почти всегда непуст, но его сред SELECT uniq(UserID) FROM test.hits SETTINGS max_threads = 1 SELECT uniq(UserID) FROM test.hits -SELECT uniqCombined(UserID) FROM test.hits SETTINGS max_threads = 1 -SELECT uniqCombined(UserID) FROM test.hits +SELECT uniqCombined(17)(UserID) FROM test.hits SETTINGS max_threads = 1 +SELECT uniqCombined(17)(UserID) FROM test.hits SELECT uniqExact(UserID) FROM test.hits SETTINGS max_threads = 1 SELECT uniqExact(UserID) FROM test.hits diff --git a/dbms/tests/performance/uniq/uniq.xml b/dbms/tests/performance/uniq/uniq.xml new file mode 100644 index 0000000000..ba9e347ac5 --- /dev/null +++ b/dbms/tests/performance/uniq/uniq.xml @@ -0,0 +1,65 @@ + + uniq + loop + + + hits_100m_single + 30000000000 + + + + + 10000 + + + 5000 + 20000 + + + + + + + + + 20000000000 + + + + + key + + 1 + SearchEngineID + RegionID + SearchPhrase + ClientIP + + + + func + + sum + uniq + uniqExact + uniqHLL12 + uniqCombined(12) + uniqCombined(13) + uniqCombined(14) + uniqCombined(15) + uniqCombined(16) + uniqCombined(17) + uniqCombined(18) + uniqCombined(19) + uniqCombined(20) + uniqUpTo(3) + uniqUpTo(5) + uniqUpTo(10) + uniqUpTo(25) + uniqUpTo(100) + + + + + SELECT {key} AS k, {func}(UserID) FROM hits_100m_single GROUP BY k + diff --git a/dbms/tests/queries/0_stateless/00030_alter_table.reference b/dbms/tests/queries/0_stateless/00030_alter_table.reference index ac2c0af78f..05022b485f 100644 --- a/dbms/tests/queries/0_stateless/00030_alter_table.reference +++ b/dbms/tests/queries/0_stateless/00030_alter_table.reference @@ -1,27 +1,27 @@ -CounterID UInt32 -StartDate Date -UserID UInt32 -VisitID UInt32 -NestedColumn.A Array(UInt8) -NestedColumn.S Array(String) -ToDrop UInt32 -Added0 UInt32 -Added1 UInt32 -Added2 UInt32 -AddedNested1.A Array(UInt32) -AddedNested1.B Array(UInt64) -AddedNested1.C Array(String) -AddedNested2.A Array(UInt32) -AddedNested2.B Array(UInt64) -CounterID UInt32 -StartDate Date -UserID UInt32 -VisitID UInt32 -Added0 String -Added1 UInt32 -Added2 UInt32 -AddedNested1.A Array(UInt32) -AddedNested1.C Array(String) -AddedNested2.A Array(UInt32) -AddedNested2.B Array(UInt64) +CounterID UInt32 +StartDate Date +UserID UInt32 +VisitID UInt32 +NestedColumn.A Array(UInt8) +NestedColumn.S Array(String) +ToDrop UInt32 +Added0 UInt32 +Added1 UInt32 +Added2 UInt32 +AddedNested1.A Array(UInt32) +AddedNested1.B Array(UInt64) +AddedNested1.C Array(String) +AddedNested2.A Array(UInt32) +AddedNested2.B Array(UInt64) +CounterID UInt32 +StartDate Date +UserID UInt32 +VisitID UInt32 +Added0 String +Added1 UInt32 +Added2 UInt32 +AddedNested1.A Array(UInt32) +AddedNested1.C Array(String) +AddedNested2.A Array(UInt32) +AddedNested2.B Array(UInt64) 1 2014-01-01 2 3 0 0 [] [] [] [] diff --git a/dbms/tests/queries/0_stateless/00061_merge_tree_alter.reference b/dbms/tests/queries/0_stateless/00061_merge_tree_alter.reference index 48a896785f..9545f7b420 100644 --- a/dbms/tests/queries/0_stateless/00061_merge_tree_alter.reference +++ b/dbms/tests/queries/0_stateless/00061_merge_tree_alter.reference @@ -1,56 +1,56 @@ -d Date -k UInt64 -i32 Int32 +d Date +k UInt64 +i32 Int32 CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32) ENGINE = MergeTree(d, k, 8192) 2015-01-01 10 42 -d Date -k UInt64 -i32 Int32 -n.ui8 Array(UInt8) -n.s Array(String) +d Date +k UInt64 +i32 Int32 +n.ui8 Array(UInt8) +n.s Array(String) CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String)) ENGINE = MergeTree(d, k, 8192) 2015-01-01 8 40 [1,2,3] ['12','13','14'] 2015-01-01 10 42 [] [] -d Date -k UInt64 -i32 Int32 -n.ui8 Array(UInt8) -n.s Array(String) -n.d Array(Date) +d Date +k UInt64 +i32 Int32 +n.ui8 Array(UInt8) +n.s Array(String) +n.d Array(Date) CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) 2015-01-01 7 39 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 2015-01-01 8 40 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 10 42 [] [] [] -d Date -k UInt64 -i32 Int32 -n.ui8 Array(UInt8) -n.s Array(String) -n.d Array(Date) -s String DEFAULT \'0\' +d Date +k UInt64 +i32 Int32 +n.ui8 Array(UInt8) +n.s Array(String) +n.d Array(Date) +s String DEFAULT \'0\' CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date), s String DEFAULT \'0\') ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] ['2000-01-01','2000-01-01','2000-01-03'] 100500 2015-01-01 7 39 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 0 2015-01-01 8 40 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 0 2015-01-01 10 42 [] [] [] 0 -d Date -k UInt64 -i32 Int32 -n.ui8 Array(UInt8) -n.s Array(String) -s Int64 +d Date +k UInt64 +i32 Int32 +n.ui8 Array(UInt8) +n.s Array(String) +s Int64 CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String), s Int64) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] 100500 2015-01-01 7 39 [10,20,30] ['120','130','140'] 0 2015-01-01 8 40 [1,2,3] ['12','13','14'] 0 2015-01-01 10 42 [] [] 0 -d Date -k UInt64 -i32 Int32 -n.ui8 Array(UInt8) -n.s Array(String) -s UInt32 -n.d Array(Date) +d Date +k UInt64 +i32 Int32 +n.ui8 Array(UInt8) +n.s Array(String) +s UInt32 +n.d Array(Date) CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String), s UInt32, `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] 100500 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 7 39 [10,20,30] ['120','130','140'] 0 ['0000-00-00','0000-00-00','0000-00-00'] @@ -60,40 +60,40 @@ CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), 2015-01-01 7 39 [10,20,30] ['120','130','140'] 0 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 8 40 [1,2,3] ['12','13','14'] 0 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 10 42 [] [] 0 [] -d Date -k UInt64 -i32 Int32 -n.s Array(String) -s UInt32 +d Date +k UInt64 +i32 Int32 +n.s Array(String) +s UInt32 CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.s` Array(String), s UInt32) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 ['asd','qwe','qwe'] 100500 2015-01-01 7 39 ['120','130','140'] 0 2015-01-01 8 40 ['12','13','14'] 0 2015-01-01 10 42 [] 0 -d Date -k UInt64 -i32 Int32 -s UInt32 +d Date +k UInt64 +i32 Int32 +s UInt32 CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, s UInt32) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 2015-01-01 7 39 0 2015-01-01 8 40 0 2015-01-01 10 42 0 -d Date -k UInt64 -i32 Int32 -s UInt32 -n.s Array(String) -n.d Array(Date) +d Date +k UInt64 +i32 Int32 +s UInt32 +n.s Array(String) +n.d Array(Date) CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, s UInt32, `n.s` Array(String), `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 [] [] 2015-01-01 7 39 0 [] [] 2015-01-01 8 40 0 [] [] 2015-01-01 10 42 0 [] [] -d Date -k UInt64 -i32 Int32 -s UInt32 +d Date +k UInt64 +i32 Int32 +s UInt32 CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, s UInt32) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 2015-01-01 7 39 0 diff --git a/dbms/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference b/dbms/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference index d2a95113dd..cd37165696 100644 --- a/dbms/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference +++ b/dbms/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference @@ -1,213 +1,213 @@ -d Date -k UInt64 -i32 Int32 +d Date +k UInt64 +i32 Int32 CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) -d Date -k UInt64 -i32 Int32 +d Date +k UInt64 +i32 Int32 CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 10 42 -d Date -k UInt64 -i32 Int32 -dt DateTime +d Date +k UInt64 +i32 Int32 +dt DateTime CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) -d Date -k UInt64 -i32 Int32 -dt DateTime +d Date +k UInt64 +i32 Int32 +dt DateTime CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 9 41 1992-01-01 08:00:00 2015-01-01 10 42 0000-00-00 00:00:00 -d Date -k UInt64 -i32 Int32 -dt DateTime -n.ui8 Array(UInt8) -n.s Array(String) +d Date +k UInt64 +i32 Int32 +dt DateTime +n.ui8 Array(UInt8) +n.s Array(String) CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) -d Date -k UInt64 -i32 Int32 -dt DateTime -n.ui8 Array(UInt8) -n.s Array(String) +d Date +k UInt64 +i32 Int32 +dt DateTime +n.ui8 Array(UInt8) +n.s Array(String) CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] 2015-01-01 9 41 1992-01-01 08:00:00 [] [] 2015-01-01 10 42 0000-00-00 00:00:00 [] [] -d Date -k UInt64 -i32 Int32 -dt DateTime -n.ui8 Array(UInt8) -n.s Array(String) -n.d Array(Date) +d Date +k UInt64 +i32 Int32 +dt DateTime +n.ui8 Array(UInt8) +n.s Array(String) +n.d Array(Date) CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) -d Date -k UInt64 -i32 Int32 -dt DateTime -n.ui8 Array(UInt8) -n.s Array(String) -n.d Array(Date) +d Date +k UInt64 +i32 Int32 +dt DateTime +n.ui8 Array(UInt8) +n.s Array(String) +n.d Array(Date) CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 9 41 1992-01-01 08:00:00 [] [] [] 2015-01-01 10 42 0000-00-00 00:00:00 [] [] [] -d Date -k UInt64 -i32 Int32 -dt DateTime -n.ui8 Array(UInt8) -n.s Array(String) -n.d Array(Date) -s String DEFAULT \'0\' +d Date +k UInt64 +i32 Int32 +dt DateTime +n.ui8 Array(UInt8) +n.s Array(String) +n.d Array(Date) +s String DEFAULT \'0\' CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date), s String DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) -d Date -k UInt64 -i32 Int32 -dt DateTime -n.ui8 Array(UInt8) -n.s Array(String) -n.d Array(Date) -s String DEFAULT \'0\' +d Date +k UInt64 +i32 Int32 +dt DateTime +n.ui8 Array(UInt8) +n.s Array(String) +n.d Array(Date) +s String DEFAULT \'0\' CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date), s String DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] ['2000-01-01','2000-01-01','2000-01-03'] 100500 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 0 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 0 2015-01-01 9 41 1992-01-01 08:00:00 [] [] [] 0 2015-01-01 10 42 0000-00-00 00:00:00 [] [] [] 0 -d Date -k UInt64 -i32 Int32 -dt DateTime -n.ui8 Array(UInt8) -n.s Array(String) -s Int64 +d Date +k UInt64 +i32 Int32 +dt DateTime +n.ui8 Array(UInt8) +n.s Array(String) +s Int64 CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), s Int64) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) -d Date -k UInt64 -i32 Int32 -dt DateTime -n.ui8 Array(UInt8) -n.s Array(String) -s Int64 +d Date +k UInt64 +i32 Int32 +dt DateTime +n.ui8 Array(UInt8) +n.s Array(String) +s Int64 CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), s Int64) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] 100500 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] 0 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] 0 2015-01-01 9 41 1992-01-01 08:00:00 [] [] 0 2015-01-01 10 42 0000-00-00 00:00:00 [] [] 0 -d Date -k UInt64 -i32 Int32 -dt DateTime -n.ui8 Array(UInt8) -n.s Array(String) -s UInt32 -n.d Array(Date) +d Date +k UInt64 +i32 Int32 +dt DateTime +n.ui8 Array(UInt8) +n.s Array(String) +s UInt32 +n.d Array(Date) CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), s UInt32, `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) -d Date -k UInt64 -i32 Int32 -dt DateTime -n.ui8 Array(UInt8) -n.s Array(String) -s UInt32 -n.d Array(Date) +d Date +k UInt64 +i32 Int32 +dt DateTime +n.ui8 Array(UInt8) +n.s Array(String) +s UInt32 +n.d Array(Date) CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), s UInt32, `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] 100500 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] 0 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] 0 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 9 41 1992-01-01 08:00:00 [] [] 0 [] 2015-01-01 10 42 0000-00-00 00:00:00 [] [] 0 [] -d Date -k UInt64 -i32 Int32 -dt DateTime -n.s Array(String) -s UInt32 +d Date +k UInt64 +i32 Int32 +dt DateTime +n.s Array(String) +s UInt32 CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.s` Array(String), s UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) -d Date -k UInt64 -i32 Int32 -dt DateTime -n.s Array(String) -s UInt32 +d Date +k UInt64 +i32 Int32 +dt DateTime +n.s Array(String) +s UInt32 CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.s` Array(String), s UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 ['asd','qwe','qwe'] 100500 2015-01-01 7 39 2014-07-14 13:26:50 ['120','130','140'] 0 2015-01-01 8 40 2012-12-12 12:12:12 ['12','13','14'] 0 2015-01-01 9 41 1992-01-01 08:00:00 [] 0 2015-01-01 10 42 0000-00-00 00:00:00 [] 0 -d Date -k UInt64 -i32 Int32 -dt DateTime -s UInt32 +d Date +k UInt64 +i32 Int32 +dt DateTime +s UInt32 CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, s UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) -d Date -k UInt64 -i32 Int32 -dt DateTime -s UInt32 +d Date +k UInt64 +i32 Int32 +dt DateTime +s UInt32 CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, s UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 100500 2015-01-01 7 39 2014-07-14 13:26:50 0 2015-01-01 8 40 2012-12-12 12:12:12 0 2015-01-01 9 41 1992-01-01 08:00:00 0 2015-01-01 10 42 0000-00-00 00:00:00 0 -d Date -k UInt64 -i32 Int32 -dt DateTime -s UInt32 -n.s Array(String) -n.d Array(Date) +d Date +k UInt64 +i32 Int32 +dt DateTime +s UInt32 +n.s Array(String) +n.d Array(Date) CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, s UInt32, `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) -d Date -k UInt64 -i32 Int32 -dt DateTime -s UInt32 -n.s Array(String) -n.d Array(Date) +d Date +k UInt64 +i32 Int32 +dt DateTime +s UInt32 +n.s Array(String) +n.d Array(Date) CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, s UInt32, `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 100500 [] [] 2015-01-01 7 39 2014-07-14 13:26:50 0 [] [] 2015-01-01 8 40 2012-12-12 12:12:12 0 [] [] 2015-01-01 9 41 1992-01-01 08:00:00 0 [] [] 2015-01-01 10 42 0000-00-00 00:00:00 0 [] [] -d Date -k UInt64 -i32 Int32 -dt DateTime -s UInt32 +d Date +k UInt64 +i32 Int32 +dt DateTime +s UInt32 CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, s UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) -d Date -k UInt64 -i32 Int32 -dt DateTime -s UInt32 +d Date +k UInt64 +i32 Int32 +dt DateTime +s UInt32 CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, s UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 100500 2015-01-01 7 39 2014-07-14 13:26:50 0 2015-01-01 8 40 2012-12-12 12:12:12 0 2015-01-01 9 41 1992-01-01 08:00:00 0 2015-01-01 10 42 0000-00-00 00:00:00 0 -d Date -k UInt64 -i32 Int32 -dt Date -s DateTime +d Date +k UInt64 +i32 Int32 +dt Date +s DateTime CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt Date, s DateTime) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) -d Date -k UInt64 -i32 Int32 -dt Date -s DateTime +d Date +k UInt64 +i32 Int32 +dt Date +s DateTime CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt Date, s DateTime) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 1970-01-02 06:55:00 2015-01-01 7 39 2014-07-14 0000-00-00 00:00:00 diff --git a/dbms/tests/queries/0_stateless/000732_base64_functions.reference b/dbms/tests/queries/0_stateless/000732_base64_functions.reference new file mode 100644 index 0000000000..b22ae4e7e2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/000732_base64_functions.reference @@ -0,0 +1,16 @@ + +Zg== +Zm8= +Zm9v +Zm9vYg== +Zm9vYmE= +Zm9vYmFy + +f +fo +foo +foob +fooba +foobar +1 1 + diff --git a/dbms/tests/queries/0_stateless/000732_base64_functions.sql b/dbms/tests/queries/0_stateless/000732_base64_functions.sql new file mode 100644 index 0000000000..a2dd4c4b41 --- /dev/null +++ b/dbms/tests/queries/0_stateless/000732_base64_functions.sql @@ -0,0 +1,6 @@ +SET send_logs_level = 'none'; +SELECT base64Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val); +SELECT base64Decode(val) FROM (select arrayJoin(['', 'Zg==', 'Zm8=', 'Zm9v', 'Zm9vYg==', 'Zm9vYmE=', 'Zm9vYmFy']) val); +SELECT base64Decode(base64Encode('foo')) = 'foo', base64Encode(base64Decode('Zm9v')) == 'Zm9v'; +SELECT tryBase64Decode('Zm9vYmF=Zm9v'); +SELECT base64Decode('Zm9vYmF=Zm9v'); -- { serverError 117 } \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00079_defaulted_columns.reference b/dbms/tests/queries/0_stateless/00079_defaulted_columns.reference index 0665588a53..135da6b368 100644 --- a/dbms/tests/queries/0_stateless/00079_defaulted_columns.reference +++ b/dbms/tests/queries/0_stateless/00079_defaulted_columns.reference @@ -1,42 +1,42 @@ -col1 UInt8 DEFAULT 0 -col1 UInt32 -col2 UInt64 DEFAULT col1 + 1 -col3 UInt64 MATERIALIZED col1 + 2 -col4 UInt64 ALIAS col1 + 3 +col1 UInt8 DEFAULT 0 +col1 UInt32 +col2 UInt64 DEFAULT col1 + 1 +col3 UInt64 MATERIALIZED col1 + 2 +col4 UInt64 ALIAS col1 + 3 10 11 12 13 99 -payload String -date Date MATERIALIZED today() -key UInt64 MATERIALIZED 0 * rand() +payload String +date Date MATERIALIZED today() +key UInt64 MATERIALIZED 0 * rand() hello clickhouse -payload String -date Date MATERIALIZED today() -key UInt64 MATERIALIZED 0 * rand() -payload_length UInt64 MATERIALIZED length(payload) +payload String +date Date MATERIALIZED today() +key UInt64 MATERIALIZED 0 * rand() +payload_length UInt64 MATERIALIZED length(payload) hello clickhouse 16 hello clickhouse 16 some string 11 hello clickhouse 16 some string 11 -payload String -payload_length UInt64 DEFAULT length(payload) -date Date MATERIALIZED today() -key UInt64 MATERIALIZED 0 * rand() +payload String +payload_length UInt64 DEFAULT length(payload) +date Date MATERIALIZED today() +key UInt64 MATERIALIZED 0 * rand() hello clickhouse 16 some string 11 -payload String -payload_length UInt16 DEFAULT length(payload) % 65535 -date Date MATERIALIZED today() -key UInt64 MATERIALIZED 0 * rand() +payload String +payload_length UInt16 DEFAULT length(payload) % 65535 +date Date MATERIALIZED today() +key UInt64 MATERIALIZED 0 * rand() hello clickhouse 16 some string 11 -payload String -payload_length UInt16 DEFAULT CAST(length(payload), \'UInt16\') -date Date MATERIALIZED today() -key UInt64 MATERIALIZED 0 * rand() -payload String -date Date MATERIALIZED today() -key UInt64 MATERIALIZED 0 * rand() +payload String +payload_length UInt16 DEFAULT CAST(length(payload), \'UInt16\') +date Date MATERIALIZED today() +key UInt64 MATERIALIZED 0 * rand() +payload String +date Date MATERIALIZED today() +key UInt64 MATERIALIZED 0 * rand() hello clickhouse some string diff --git a/dbms/tests/queries/0_stateless/00168_buffer_defaults.reference b/dbms/tests/queries/0_stateless/00168_buffer_defaults.reference index 3f70f0ee2c..aa378b78ba 100644 --- a/dbms/tests/queries/0_stateless/00168_buffer_defaults.reference +++ b/dbms/tests/queries/0_stateless/00168_buffer_defaults.reference @@ -1,9 +1,9 @@ -EventDate Date -UTCEventTime DateTime -MoscowEventDate Date DEFAULT toDate(UTCEventTime) -EventDate Date -UTCEventTime DateTime -MoscowEventDate Date DEFAULT toDate(UTCEventTime) +EventDate Date +UTCEventTime DateTime +MoscowEventDate Date DEFAULT toDate(UTCEventTime) +EventDate Date +UTCEventTime DateTime +MoscowEventDate Date DEFAULT toDate(UTCEventTime) 2015-06-09 2015-06-09 01:02:03 2015-06-09 2015-06-09 2015-06-09 01:02:03 2015-06-09 2015-06-09 2015-06-09 01:02:03 2015-06-09 diff --git a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference index 8c94b8a5d5..7ca0f2fb7b 100644 --- a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference +++ b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference @@ -259,6 +259,162 @@ 31 53948 35 53931 36 53982 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54708 +1 53721 +3 53226 +6 54532 +7 52275 +9 53417 +10 26931 +11 54428 +13 53409 +14 53188 +17 55120 +19 54123 +20 53293 +21 53928 +22 53827 +26 53920 +31 53763 +35 54635 +36 53155 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 53901 +1 54056 +3 53999 +6 54129 +7 54213 +9 53853 +10 26975 +11 54064 +13 53963 +14 53997 +17 54129 +19 53923 +20 53958 +21 54117 +22 54150 +26 54047 +31 53948 +35 53931 +36 53982 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54054 +1 54054 +3 54054 +6 54054 +7 54053 +9 54053 +10 27027 +11 54055 +13 54054 +14 54054 +17 54054 +19 54053 +20 54053 +21 54054 +22 54053 +26 54053 +31 54054 +35 54054 +36 54054 0.125 1 0.5 1 0.05 1 @@ -311,6 +467,162 @@ 0.043 54150 0.037 54047 0.071 53963 +0.125 1 +0.5 1 +0.05 1 +0.143 1 +0.056 1 +0.048 2 +0.083 1 +0.25 1 +0.1 1 +0.028 1 +0.027 1 +0.031 1 +0.067 1 +0.037 1 +0.045 162 +0.125 163 +0.5 162 +0.05 162 +0.143 162 +0.091 81 +0.056 162 +0.048 162 +0.083 163 +0.25 162 +1 162 +0.1 163 +0.028 162 +0.027 162 +0.031 162 +0.067 162 +0.043 162 +0.037 162 +0.071 162 +0.045 53928 +0.125 52275 +0.5 53721 +0.05 54123 +0.143 54532 +0.091 26931 +0.056 55120 +0.048 53293 +0.083 54428 +0.25 53226 +1 54708 +0.1 53417 +0.028 54635 +0.027 53155 +0.031 53763 +0.067 53188 +0.043 53827 +0.037 53920 +0.071 53409 +0.125 1 +0.5 1 +0.05 1 +0.143 1 +0.056 1 +0.048 2 +0.083 1 +0.25 1 +0.1 1 +0.028 1 +0.027 1 +0.031 1 +0.067 1 +0.037 1 +0.045 162 +0.125 163 +0.5 162 +0.05 162 +0.143 162 +0.091 81 +0.056 162 +0.048 162 +0.083 163 +0.25 162 +1 162 +0.1 163 +0.028 162 +0.027 162 +0.031 162 +0.067 162 +0.043 162 +0.037 162 +0.071 162 +0.045 54117 +0.125 54213 +0.5 54056 +0.05 53923 +0.143 54129 +0.091 26975 +0.056 54129 +0.048 53958 +0.083 54064 +0.25 53999 +1 53901 +0.1 53853 +0.028 53931 +0.027 53982 +0.031 53948 +0.067 53997 +0.043 54150 +0.037 54047 +0.071 53963 +0.125 1 +0.5 1 +0.05 1 +0.143 1 +0.056 1 +0.048 2 +0.083 1 +0.25 1 +0.1 1 +0.028 1 +0.027 1 +0.031 1 +0.067 1 +0.037 1 +0.045 162 +0.125 163 +0.5 162 +0.05 162 +0.143 162 +0.091 81 +0.056 162 +0.048 162 +0.083 163 +0.25 162 +1 162 +0.1 163 +0.028 162 +0.027 162 +0.031 162 +0.067 162 +0.043 162 +0.037 162 +0.071 162 +0.045 54054 +0.125 54053 +0.5 54054 +0.05 54053 +0.143 54054 +0.091 27027 +0.056 54054 +0.048 54053 +0.083 54055 +0.25 54054 +1 54054 +0.1 54053 +0.028 54054 +0.027 54054 +0.031 54054 +0.067 54054 +0.043 54053 +0.037 54053 +0.071 54054 0.5 1 0.05 1 0.25 1 @@ -363,6 +675,162 @@ 0.037 54047 0.1 53853 1 53901 +0.5 1 +0.05 1 +0.25 1 +0.048 2 +0.083 1 +0.125 1 +0.031 1 +0.143 1 +0.028 1 +0.067 1 +0.027 1 +0.056 1 +0.037 1 +0.1 1 +0.5 162 +0.05 162 +0.25 162 +0.048 162 +0.091 81 +0.043 162 +0.071 162 +0.083 163 +0.125 163 +0.031 162 +0.143 162 +0.028 162 +0.067 162 +0.045 162 +0.027 162 +0.056 162 +0.037 162 +0.1 163 +1 162 +0.5 53721 +0.05 54123 +0.25 53226 +0.048 53293 +0.091 26931 +0.043 53827 +0.071 53409 +0.083 54428 +0.125 52275 +0.031 53763 +0.143 54532 +0.028 54635 +0.067 53188 +0.045 53928 +0.027 53155 +0.056 55120 +0.037 53920 +0.1 53417 +1 54708 +0.5 1 +0.05 1 +0.25 1 +0.048 2 +0.083 1 +0.125 1 +0.031 1 +0.143 1 +0.028 1 +0.067 1 +0.027 1 +0.056 1 +0.037 1 +0.1 1 +0.5 162 +0.05 162 +0.25 162 +0.048 162 +0.091 81 +0.043 162 +0.071 162 +0.083 163 +0.125 163 +0.031 162 +0.143 162 +0.028 162 +0.067 162 +0.045 162 +0.027 162 +0.056 162 +0.037 162 +0.1 163 +1 162 +0.5 54056 +0.05 53923 +0.25 53999 +0.048 53958 +0.091 26975 +0.043 54150 +0.071 53963 +0.083 54064 +0.125 54213 +0.031 53948 +0.143 54129 +0.028 53931 +0.067 53997 +0.045 54117 +0.027 53982 +0.056 54129 +0.037 54047 +0.1 53853 +1 53901 +0.5 1 +0.05 1 +0.25 1 +0.048 2 +0.083 1 +0.125 1 +0.031 1 +0.143 1 +0.028 1 +0.067 1 +0.027 1 +0.056 1 +0.037 1 +0.1 1 +0.5 162 +0.05 162 +0.25 162 +0.048 162 +0.091 81 +0.043 162 +0.071 162 +0.083 163 +0.125 163 +0.031 162 +0.143 162 +0.028 162 +0.067 162 +0.045 162 +0.027 162 +0.056 162 +0.037 162 +0.1 163 +1 162 +0.5 54054 +0.05 54053 +0.25 54054 +0.048 54053 +0.091 27027 +0.043 54053 +0.071 54054 +0.083 54055 +0.125 54053 +0.031 54054 +0.143 54054 +0.028 54054 +0.067 54054 +0.045 54054 +0.027 54054 +0.056 54054 +0.037 54053 +0.1 54053 +1 54054 1 1 3 1 6 1 @@ -415,4 +883,163 @@ 31 54074 35 54153 36 53999 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 52613 +1 54468 +3 53824 +6 54441 +7 54543 +9 51908 +10 26964 +11 54013 +13 53178 +14 54113 +17 54662 +19 54697 +20 53279 +21 55301 +22 53693 +26 53873 +31 55200 +35 54808 +36 53051 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54195 +1 54086 +3 54127 +6 54173 +7 53969 +9 54257 +10 26985 +11 53880 +13 54105 +14 54043 +17 54176 +19 53913 +20 54088 +21 53991 +22 54112 +26 54136 +31 54074 +35 54153 +36 53999 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54054 +1 54054 +3 54054 +6 54054 +7 54054 +9 54054 +10 27027 +11 54055 +13 54054 +14 54054 +17 54054 +19 54054 +20 54054 +21 54054 +22 54054 +26 54054 +31 54054 +35 54054 +36 54054 +1 +1 +1 1 diff --git a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql index 53b5ec0001..ae54831b1a 100644 --- a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql +++ b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql @@ -24,16 +24,70 @@ SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT Y, uniqCombined(12)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(12)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(12)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(20)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(20)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(20)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + SELECT uniqCombined(dummy) FROM remote('127.0.0.{2,3}', system.one); + +SELECT uniqCombined(12)(dummy) FROM remote('127.0.0.{2,3}', system.one); + +SELECT uniqCombined(17)(dummy) FROM remote('127.0.0.{2,3}', system.one); + +SELECT uniqCombined(20)(dummy) FROM remote('127.0.0.{2,3}', system.one); diff --git a/dbms/tests/queries/0_stateless/00264_uniq_many_args.reference b/dbms/tests/queries/0_stateless/00264_uniq_many_args.reference index 758b4d6b8e..12aeed56e8 100644 --- a/dbms/tests/queries/0_stateless/00264_uniq_many_args.reference +++ b/dbms/tests/queries/0_stateless/00264_uniq_many_args.reference @@ -1,11 +1,11 @@ -10 10 100 100 1000 1000 10 10 100 100 1000 1000 10 10 101 101 1006 1006 10 10 100 100 1000 1000 6 6 6 6 6 6 -17 10 10 100 100 610 610 10 10 100 100 610 610 10 10 101 101 616 616 10 10 100 100 610 610 6 6 6 6 6 6 766 -52 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 611 611 10 10 100 100 608 608 6 6 6 6 6 6 766 -5 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 608 608 10 10 100 100 609 609 6 6 6 6 6 6 765 -9 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 618 618 10 10 100 100 608 608 6 6 6 6 6 6 765 -13 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 610 610 10 10 100 100 607 607 6 6 6 6 6 6 765 -46 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 611 611 10 10 100 100 607 607 6 6 6 6 6 6 765 -48 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 617 617 10 10 100 100 609 609 6 6 6 6 6 6 765 -50 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 765 -54 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 615 615 10 10 100 100 609 609 6 6 6 6 6 6 765 -56 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 765 +10 10 100 100 1000 1000 10 10 100 100 1000 1000 10 10 100 100 1000 1000 10 10 101 101 1006 1006 10 10 100 100 1000 1000 6 6 6 6 6 6 +17 10 10 100 100 610 610 10 10 100 100 610 610 10 10 100 100 610 610 10 10 101 101 616 616 10 10 100 100 610 610 6 6 6 6 6 6 766 +52 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 611 611 10 10 100 100 608 608 6 6 6 6 6 6 766 +5 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 608 608 10 10 100 100 609 609 6 6 6 6 6 6 765 +9 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 618 618 10 10 100 100 608 608 6 6 6 6 6 6 765 +13 10 10 100 100 607 607 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 610 610 10 10 100 100 607 607 6 6 6 6 6 6 765 +46 10 10 100 100 607 607 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 611 611 10 10 100 100 607 607 6 6 6 6 6 6 765 +48 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 617 617 10 10 100 100 609 609 6 6 6 6 6 6 765 +50 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 765 +54 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 615 615 10 10 100 100 609 609 6 6 6 6 6 6 765 +56 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 765 diff --git a/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql b/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql index 2b24e68910..847d753a36 100644 --- a/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql +++ b/dbms/tests/queries/0_stateless/00264_uniq_many_args.sql @@ -1,12 +1,13 @@ -SELECT +SELECT uniq(x), uniq((x)), uniq(x, y), uniq((x, y)), uniq(x, y, z), uniq((x, y, z)), uniqCombined(x), uniqCombined((x)), uniqCombined(x, y), uniqCombined((x, y)), uniqCombined(x, y, z), uniqCombined((x, y, z)), + uniqCombined(17)(x), uniqCombined(17)((x)), uniqCombined(17)(x, y), uniqCombined(17)((x, y)), uniqCombined(17)(x, y, z), uniqCombined(17)((x, y, z)), uniqHLL12(x), uniqHLL12((x)), uniqHLL12(x, y), uniqHLL12((x, y)), uniqHLL12(x, y, z), uniqHLL12((x, y, z)), uniqExact(x), uniqExact((x)), uniqExact(x, y), uniqExact((x, y)), uniqExact(x, y, z), uniqExact((x, y, z)), uniqUpTo(5)(x), uniqUpTo(5)((x)), uniqUpTo(5)(x, y), uniqUpTo(5)((x, y)), uniqUpTo(5)(x, y, z), uniqUpTo(5)((x, y, z)) FROM ( - SELECT + SELECT number % 10 AS x, intDiv(number, 10) % 10 AS y, toString(intDiv(number, 100) % 10) AS z @@ -17,13 +18,14 @@ FROM SELECT k, uniq(x), uniq((x)), uniq(x, y), uniq((x, y)), uniq(x, y, z), uniq((x, y, z)), uniqCombined(x), uniqCombined((x)), uniqCombined(x, y), uniqCombined((x, y)), uniqCombined(x, y, z), uniqCombined((x, y, z)), + uniqCombined(17)(x), uniqCombined(17)((x)), uniqCombined(17)(x, y), uniqCombined(17)((x, y)), uniqCombined(17)(x, y, z), uniqCombined(17)((x, y, z)), uniqHLL12(x), uniqHLL12((x)), uniqHLL12(x, y), uniqHLL12((x, y)), uniqHLL12(x, y, z), uniqHLL12((x, y, z)), uniqExact(x), uniqExact((x)), uniqExact(x, y), uniqExact((x, y)), uniqExact(x, y, z), uniqExact((x, y, z)), uniqUpTo(5)(x), uniqUpTo(5)((x)), uniqUpTo(5)(x, y), uniqUpTo(5)((x, y)), uniqUpTo(5)(x, y, z), uniqUpTo(5)((x, y, z)), count() AS c FROM ( - SELECT + SELECT (number + 0x8ffcbd8257219a26) * 0x66bb3430c06d2353 % 131 AS k, number % 10 AS x, intDiv(number, 10) % 10 AS y, diff --git a/dbms/tests/queries/0_stateless/00294_shard_enums.reference b/dbms/tests/queries/0_stateless/00294_shard_enums.reference index 0f354d5723..28dc778125 100644 --- a/dbms/tests/queries/0_stateless/00294_shard_enums.reference +++ b/dbms/tests/queries/0_stateless/00294_shard_enums.reference @@ -1,28 +1,28 @@ -d Date DEFAULT CAST(\'2015-12-29\', \'Date\') -k UInt8 DEFAULT 0 -e Enum8(\'hello\' = 1, \'world\' = 2) -sign Enum8(\'minus\' = -1, \'plus\' = 1) -letter Enum16(\'*\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) +d Date DEFAULT CAST(\'2015-12-29\', \'Date\') +k UInt8 DEFAULT 0 +e Enum8(\'hello\' = 1, \'world\' = 2) +sign Enum8(\'minus\' = -1, \'plus\' = 1) +letter Enum16(\'*\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) 2015-12-29 0 hello minus * -d Date DEFAULT CAST(\'2015-12-29\', \'Date\') -k UInt8 DEFAULT 0 -e Enum8(\'hello\' = 1, \'world\' = 2, \'!\' = 3) -sign Enum8(\'minus\' = -1, \'plus\' = 1) -letter Enum16(\'*\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) +d Date DEFAULT CAST(\'2015-12-29\', \'Date\') +k UInt8 DEFAULT 0 +e Enum8(\'hello\' = 1, \'world\' = 2, \'!\' = 3) +sign Enum8(\'minus\' = -1, \'plus\' = 1) +letter Enum16(\'*\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) 2015-12-29 0 hello minus * 2015-12-29 0 ! plus b -d Date DEFAULT CAST(\'2015-12-29\', \'Date\') -k UInt8 DEFAULT 0 -e Enum16(\'hello\' = 1, \'world\' = 2, \'!\' = 3) -sign Enum16(\'minus\' = -1, \'plus\' = 1) -letter Enum16(\'no letter\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) +d Date DEFAULT CAST(\'2015-12-29\', \'Date\') +k UInt8 DEFAULT 0 +e Enum16(\'hello\' = 1, \'world\' = 2, \'!\' = 3) +sign Enum16(\'minus\' = -1, \'plus\' = 1) +letter Enum16(\'no letter\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) 2015-12-29 0 hello minus no letter 2015-12-29 0 ! plus b -d Date DEFAULT CAST(\'2015-12-29\', \'Date\') -k UInt8 DEFAULT 0 -e Enum8(\'hello\' = 1, \'world\' = 2, \'!\' = 3) -sign Enum8(\'minus\' = -1, \'plus\' = 1) -letter Enum16(\'no letter\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) +d Date DEFAULT CAST(\'2015-12-29\', \'Date\') +k UInt8 DEFAULT 0 +e Enum8(\'hello\' = 1, \'world\' = 2, \'!\' = 3) +sign Enum8(\'minus\' = -1, \'plus\' = 1) +letter Enum16(\'no letter\' = -256, \'a\' = 0, \'b\' = 1, \'c\' = 2) 2015-12-29 0 hello minus no letter 2015-12-29 0 ! plus b 2015-12-29 0 world minus c diff --git a/dbms/tests/queries/0_stateless/00415_into_outfile.reference b/dbms/tests/queries/0_stateless/00415_into_outfile.reference index 0aa3993f4c..1fc1ceac89 100644 --- a/dbms/tests/queries/0_stateless/00415_into_outfile.reference +++ b/dbms/tests/queries/0_stateless/00415_into_outfile.reference @@ -6,7 +6,7 @@ performing test: union_all performing test: bad_union_all query failed performing test: describe_table -dummy UInt8 +dummy UInt8 performing test: clickhouse-local 2 3 performing test: http diff --git a/dbms/tests/queries/0_stateless/00428_partition.reference b/dbms/tests/queries/0_stateless/00428_partition.reference index 788600df41..241048131f 100644 --- a/dbms/tests/queries/0_stateless/00428_partition.reference +++ b/dbms/tests/queries/0_stateless/00428_partition.reference @@ -1,6 +1,44 @@ 5 5 +082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.bin +082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.bin +38e62ff37e1e5064e9a3f605dfe09d13 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/v1.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/v1.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.mrk +55a54008ad1ba589aa210d2629c1df41 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/primary.idx +77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/columns.txt +77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/columns.txt +88cdc31ded355e7572d68d8cde525d3a shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.bin +9e688c58a5487b8eaf69c9e1005ad0bf shadow/1/data/test/partition_428/19700102_19700102_2_2_0/primary.idx +b01e3d4df40467db3f1c2d029f59f6a2 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/checksums.txt +b026324c6904b2a9cb4b88d6d61c81d1 shadow/increment.txt +cfcb770c3ecd0990dcceb1bde129e6c6 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.bin +e2af3bef1fd129aea73a890ede1e7a30 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.bin +e6654eba9e88b001280d3bdd21ccc417 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/checksums.txt 5 5 +082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.bin +082814b5aa5109160d5c0c5aff10d4df shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.bin +38e62ff37e1e5064e9a3f605dfe09d13 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/v1.bin +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/k.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/v1.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.mrk +4ae71336e44bf9bf79d2752e234818a5 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/v1.mrk +55a54008ad1ba589aa210d2629c1df41 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/primary.idx +77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/columns.txt +77d5af402ada101574f4da114f242e02 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/columns.txt +88cdc31ded355e7572d68d8cde525d3a shadow/1/data/test/partition_428/19700201_19700201_1_1_0/p.bin +9e688c58a5487b8eaf69c9e1005ad0bf shadow/1/data/test/partition_428/19700102_19700102_2_2_0/primary.idx +b01e3d4df40467db3f1c2d029f59f6a2 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/checksums.txt +b026324c6904b2a9cb4b88d6d61c81d1 shadow/increment.txt +cfcb770c3ecd0990dcceb1bde129e6c6 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/p.bin +e2af3bef1fd129aea73a890ede1e7a30 shadow/1/data/test/partition_428/19700201_19700201_1_1_0/k.bin +e6654eba9e88b001280d3bdd21ccc417 shadow/1/data/test/partition_428/19700102_19700102_2_2_0/checksums.txt 31,1,2 1,2,3 diff --git a/dbms/tests/queries/0_stateless/00428_partition.sh b/dbms/tests/queries/0_stateless/00428_partition.sh index ce6ad9e1cd..27cb94c1d4 100755 --- a/dbms/tests/queries/0_stateless/00428_partition.sh +++ b/dbms/tests/queries/0_stateless/00428_partition.sh @@ -23,6 +23,11 @@ for part in `$chl "SELECT name FROM system.parts WHERE database='test' AND table cat $ch_dir/data/test/partition_428/$part/columns.txt) | wc -l done +$chl "ALTER TABLE test.partition_428 FREEZE" + +# Do `cd` for consistent output for reference +cd $ch_dir && find shadow -type f -exec md5sum {} \; | sort + $chl "ALTER TABLE test.partition_428 DETACH PARTITION 197001" $chl "ALTER TABLE test.partition_428 ATTACH PARTITION 197001" @@ -33,6 +38,10 @@ for part in `$chl "SELECT name FROM system.parts WHERE database='test' AND table done $chl "ALTER TABLE test.partition_428 MODIFY COLUMN v1 Int8" + +# Check the backup hasn't changed +cd $ch_dir && find shadow -type f -exec md5sum {} \; | sort + $chl "OPTIMIZE TABLE test.partition_428" $chl "SELECT toUInt16(p), k, v1 FROM test.partition_428 ORDER BY k FORMAT CSV" diff --git a/dbms/tests/queries/0_stateless/00515_shard_desc_table_functions_and_subqueries.reference b/dbms/tests/queries/0_stateless/00515_shard_desc_table_functions_and_subqueries.reference index f24dcad7c8..5ccb1694c9 100644 --- a/dbms/tests/queries/0_stateless/00515_shard_desc_table_functions_and_subqueries.reference +++ b/dbms/tests/queries/0_stateless/00515_shard_desc_table_functions_and_subqueries.reference @@ -1,30 +1,30 @@ -date Date -val UInt64 -val2 UInt8 DEFAULT 42 -val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') -val4 UInt64 ALIAS val +date Date +val UInt64 +val2 UInt8 DEFAULT 42 +val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') +val4 UInt64 ALIAS val - -date Date -val UInt64 -val2 UInt8 DEFAULT 42 -val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') -val4 UInt64 ALIAS val +date Date +val UInt64 +val2 UInt8 DEFAULT 42 +val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') +val4 UInt64 ALIAS val - -date Date -val UInt64 -val2 UInt8 DEFAULT 42 -val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') -val4 UInt64 ALIAS val +date Date +val UInt64 +val2 UInt8 DEFAULT 42 +val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') +val4 UInt64 ALIAS val - -date Date -val UInt64 -val2 UInt8 DEFAULT 42 -val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') -val4 UInt64 ALIAS val +date Date +val UInt64 +val2 UInt8 DEFAULT 42 +val3 UInt8 DEFAULT CAST(val2 + 1, \'UInt8\') +val4 UInt64 ALIAS val - -1 UInt8 +1 UInt8 - -1 UInt8 +1 UInt8 - -number UInt64 +number UInt64 - diff --git a/dbms/tests/queries/0_stateless/00557_alter_null_storage_tables.reference b/dbms/tests/queries/0_stateless/00557_alter_null_storage_tables.reference index c9634180c5..35790c754d 100644 --- a/dbms/tests/queries/0_stateless/00557_alter_null_storage_tables.reference +++ b/dbms/tests/queries/0_stateless/00557_alter_null_storage_tables.reference @@ -1,3 +1,3 @@ -x UInt8 -x Int64 DEFAULT toInt64(y) -y String +x UInt8 +x Int64 DEFAULT toInt64(y) +y String diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql index 7b7f12b8ff..2f1af7fa1f 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql @@ -1,8 +1,10 @@ SET send_logs_level = 'none'; +DROP TABLE IF EXISTS test.perf; DROP TABLE IF EXISTS test.test; DROP TABLE IF EXISTS test.test_view; +CREATE TABLE test.perf(site String, user_id UInt64, z Float64)ENGINE = Log; CREATE TABLE test.test(date Date, id Int8, name String, value Int64) ENGINE = MergeTree(date, (id, date), 8192); CREATE VIEW test.test_view AS SELECT * FROM test.test; @@ -15,6 +17,7 @@ SELECT '-------Not need optimize predicate, but it works.-------'; SELECT 1; SELECT 1 AS id WHERE id = 1; SELECT arrayJoin([1,2,3]) AS id WHERE id = 1; +SELECT * FROM (SELECT perf_1.z AS z_1 FROM test.perf AS perf_1); SELECT '-------Need push down-------'; SELECT * FROM system.one ANY LEFT JOIN (SELECT 0 AS dummy) USING dummy WHERE 1; @@ -22,6 +25,7 @@ SELECT toString(value) AS value FROM (SELECT 1 AS value) WHERE value = '1'; SELECT * FROM (SELECT 1 AS id UNION ALL SELECT 2) WHERE id = 1; SELECT * FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1; SELECT id FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1; +SELECT * FROM (SELECT perf_1.z AS z_1 FROM test.perf AS perf_1) WHERE z_1 = 1; SELECT * FROM (SELECT 1 AS id, (SELECT 1) as subquery) WHERE subquery = 1; SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test) WHERE a = 3; @@ -72,5 +76,6 @@ SELECT * FROM (SELECT toUInt64(table_alias.b) AS a, sum(id) AS b FROM test.test SELECT '-------Compatibility test-------'; SELECT * FROM (SELECT toInt8(1) AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date, id WHERE b.date = toDate('2000-01-01'); +DROP TABLE IF EXISTS test.perf; DROP TABLE IF EXISTS test.test; DROP TABLE IF EXISTS test.test_view; diff --git a/dbms/tests/queries/0_stateless/00642_cast.reference b/dbms/tests/queries/0_stateless/00642_cast.reference index 296f1351c1..56f79769cc 100644 --- a/dbms/tests/queries/0_stateless/00642_cast.reference +++ b/dbms/tests/queries/0_stateless/00642_cast.reference @@ -8,6 +8,6 @@ hello hello 1970-01-01 00:00:01 CREATE TABLE test.cast ( x UInt8, e Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')) ENGINE = MergeTree ORDER BY e SETTINGS index_granularity = 8192 -x UInt8 -e Enum8(\'hello\' = 1, \'world\' = 2) DEFAULT CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\') +x UInt8 +e Enum8(\'hello\' = 1, \'world\' = 2) DEFAULT CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\') 1 hello diff --git a/dbms/tests/queries/0_stateless/00643_cast_zookeeper.reference b/dbms/tests/queries/0_stateless/00643_cast_zookeeper.reference index 401a354382..f3b3ba04d1 100644 --- a/dbms/tests/queries/0_stateless/00643_cast_zookeeper.reference +++ b/dbms/tests/queries/0_stateless/00643_cast_zookeeper.reference @@ -1,5 +1,5 @@ CREATE TABLE test.cast1 ( x UInt8, e Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_cast', 'r1') ORDER BY e SETTINGS index_granularity = 8192 -x UInt8 -e Enum8(\'hello\' = 1, \'world\' = 2) DEFAULT CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\') +x UInt8 +e Enum8(\'hello\' = 1, \'world\' = 2) DEFAULT CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\') 1 hello 1 hello diff --git a/dbms/tests/queries/0_stateless/00651_default_database_on_client_reconnect.reference b/dbms/tests/queries/0_stateless/00651_default_database_on_client_reconnect.reference index 4482012335..8ed1dd9ba8 100644 --- a/dbms/tests/queries/0_stateless/00651_default_database_on_client_reconnect.reference +++ b/dbms/tests/queries/0_stateless/00651_default_database_on_client_reconnect.reference @@ -1 +1 @@ -val UInt64 +val UInt64 diff --git a/dbms/tests/queries/0_stateless/00693_max_block_size_system_tables_columns.reference b/dbms/tests/queries/0_stateless/00693_max_block_size_system_tables_columns.reference index ab014c64e7..1caad944cb 100644 --- a/dbms/tests/queries/0_stateless/00693_max_block_size_system_tables_columns.reference +++ b/dbms/tests/queries/0_stateless/00693_max_block_size_system_tables_columns.reference @@ -2,7 +2,7 @@ 1 1 1 - t Memory 1 0000-00-00 00:00:00 [] [] Memory + t Memory 1 0000-00-00 00:00:00 [] [] Memory 1 1 1 diff --git a/dbms/tests/queries/0_stateless/00700_decimal_aggregates.reference b/dbms/tests/queries/0_stateless/00700_decimal_aggregates.reference index 3b1c6f9099..1a7c7fbdbb 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_aggregates.reference +++ b/dbms/tests/queries/0_stateless/00700_decimal_aggregates.reference @@ -8,7 +8,7 @@ 0.0000 0.00000000 0.00000000 25.5000 8.49999999 5.10000000 -25.5000 -8.49999999 -5.10000000 -(101,101,101) (101,101,101) (101,101,101) (102,100,101) +(101,101,101) (101,101,101) (101,101,101) (101,101,101) (102,100,101) 5 5 5 10 10 10 -50.0000 -50.0000 -16.66666666 -16.66666666 -10.00000000 -10.00000000 diff --git a/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql b/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql index e10f665ad0..951e1384e9 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql +++ b/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql @@ -29,6 +29,7 @@ SELECT avg(a), avg(b), avg(c) FROM test.decimal WHERE a < 0; SELECT (uniq(a), uniq(b), uniq(c)), (uniqCombined(a), uniqCombined(b), uniqCombined(c)), + (uniqCombined(17)(a), uniqCombined(17)(b), uniqCombined(17)(c)), (uniqExact(a), uniqExact(b), uniqExact(c)), (uniqHLL12(a), uniqHLL12(b), uniqHLL12(c)) FROM (SELECT * FROM test.decimal ORDER BY a); diff --git a/dbms/tests/queries/0_stateless/00700_decimal_defaults.reference b/dbms/tests/queries/0_stateless/00700_decimal_defaults.reference index 30500d7c53..635f40a11f 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_defaults.reference +++ b/dbms/tests/queries/0_stateless/00700_decimal_defaults.reference @@ -1,9 +1,9 @@ -a Decimal(9, 4) DEFAULT CAST(0, \'Decimal(9, 4)\') -b Decimal(18, 4) DEFAULT CAST(a / 2, \'Decimal(18, 4)\') -c Decimal(38, 4) DEFAULT CAST(b / 3, \'Decimal(38, 4)\') -d Decimal(9, 4) MATERIALIZED a + toDecimal32(\'0.2\', 1) -e Decimal(18, 4) ALIAS b * 2 -f Decimal(38, 4) ALIAS c * 6 +a Decimal(9, 4) DEFAULT CAST(0, \'Decimal(9, 4)\') +b Decimal(18, 4) DEFAULT CAST(a / 2, \'Decimal(18, 4)\') +c Decimal(38, 4) DEFAULT CAST(b / 3, \'Decimal(38, 4)\') +d Decimal(9, 4) MATERIALIZED a + toDecimal32(\'0.2\', 1) +e Decimal(18, 4) ALIAS b * 2 +f Decimal(38, 4) ALIAS c * 6 0.0000 0.0000 0.0000 1.0000 0.5000 0.1666 2.0000 1.0000 0.3333 diff --git a/dbms/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.reference b/dbms/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.reference new file mode 100644 index 0000000000..97833514d8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.reference @@ -0,0 +1,10 @@ +0 2 +1 2 +2 2 +3 2 +4 2 +5 2 +6 2 +7 2 +8 2 +9 2 diff --git a/dbms/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql b/dbms/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql new file mode 100644 index 0000000000..db3ea53490 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql @@ -0,0 +1,12 @@ +SET allow_experimental_low_cardinality_type = 1; +DROP TABLE IF EXISTS test.test_low_null_float; +DROP TABLE IF EXISTS test.dist; + +CREATE TABLE test.test_low_null_float (a LowCardinality(Nullable(Float64))) ENGINE = Memory; +CREATE TABLE test.dist (a LowCardinality(Nullable(Float64))) ENGINE = Distributed('test_cluster_two_shards_localhost', 'test', 'test_low_null_float', rand()); + +INSERT INTO test.dist (a) SELECT number FROM system.numbers LIMIT 1000000; +SELECT a, count() FROM test.dist GROUP BY a ORDER BY a ASC, count() ASC LIMIT 10; + +DROP TABLE IF EXISTS test.test_low_null_float; +DROP TABLE IF EXISTS test.dist; diff --git a/dbms/tests/queries/0_stateless/00725_comment_columns.reference b/dbms/tests/queries/0_stateless/00725_comment_columns.reference new file mode 100644 index 0000000000..ca4edfb312 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00725_comment_columns.reference @@ -0,0 +1,38 @@ +CREATE TABLE test.check_query_comment_column ( first_column UInt8 DEFAULT 1 COMMENT \'comment 1\', fourth_column UInt8 COMMENT \'comment 4\', fifth_column UInt8, second_column UInt8 MATERIALIZED first_column COMMENT \'comment 2\', third_column UInt8 ALIAS second_column COMMENT \'comment 3\') ENGINE = TinyLog +first_column UInt8 DEFAULT 1 comment 1 +fourth_column UInt8 comment 4 +fifth_column UInt8 +second_column UInt8 MATERIALIZED first_column comment 2 +third_column UInt8 ALIAS second_column comment 3 +┌─table──────────────────────┬─name──────────┬─comment───┐ +│ check_query_comment_column │ first_column │ comment 1 │ +│ check_query_comment_column │ fourth_column │ comment 4 │ +│ check_query_comment_column │ fifth_column │ │ +│ check_query_comment_column │ second_column │ comment 2 │ +│ check_query_comment_column │ third_column │ comment 3 │ +└────────────────────────────┴───────────────┴───────────┘ +CREATE TABLE test.check_query_comment_column ( first_column UInt8 DEFAULT 1 COMMENT \'comment 1_1\', fourth_column UInt8 COMMENT \'comment 4_1\', fifth_column UInt8 COMMENT \'comment 5_1\', second_column UInt8 MATERIALIZED first_column COMMENT \'comment 2_1\', third_column UInt8 ALIAS second_column COMMENT \'comment 3_1\') ENGINE = TinyLog +┌─table──────────────────────┬─name──────────┬─comment─────┐ +│ check_query_comment_column │ first_column │ comment 1_2 │ +│ check_query_comment_column │ fourth_column │ comment 4_2 │ +│ check_query_comment_column │ fifth_column │ comment 5_2 │ +│ check_query_comment_column │ second_column │ comment 2_2 │ +│ check_query_comment_column │ third_column │ comment 3_2 │ +└────────────────────────────┴───────────────┴─────────────┘ +CREATE TABLE test.check_query_comment_column ( first_column UInt8 DEFAULT 1 COMMENT \'comment 1_2\', fourth_column UInt8 COMMENT \'comment 4_2\', fifth_column UInt8 COMMENT \'comment 5_2\', second_column UInt8 MATERIALIZED first_column COMMENT \'comment 2_2\', third_column UInt8 ALIAS second_column COMMENT \'comment 3_2\') ENGINE = TinyLog +CREATE TABLE test.check_query_comment_column ( first_column UInt8 COMMENT \'comment 1\', second_column UInt8 COMMENT \'comment 2\', third_column UInt8 COMMENT \'comment 3\') ENGINE = MergeTree() PARTITION BY second_column ORDER BY first_column SAMPLE BY first_column SETTINGS index_granularity = 8192 +first_column UInt8 comment 1 +second_column UInt8 comment 2 +third_column UInt8 comment 3 +┌─table──────────────────────┬─name──────────┬─comment───┐ +│ check_query_comment_column │ first_column │ comment 1 │ +│ check_query_comment_column │ second_column │ comment 2 │ +│ check_query_comment_column │ third_column │ comment 3 │ +└────────────────────────────┴───────────────┴───────────┘ +CREATE TABLE test.check_query_comment_column ( first_column UInt8 COMMENT \'comment 1_2\', second_column UInt8 COMMENT \'comment 2_2\', third_column UInt8 COMMENT \'comment 3_2\') ENGINE = MergeTree() PARTITION BY second_column ORDER BY first_column SAMPLE BY first_column SETTINGS index_granularity = 8192 +CREATE TABLE test.check_query_comment_column ( first_column UInt8 COMMENT \'comment 1_3\', second_column UInt8 COMMENT \'comment 2_3\', third_column UInt8 COMMENT \'comment 3_3\') ENGINE = MergeTree() PARTITION BY second_column ORDER BY first_column SAMPLE BY first_column SETTINGS index_granularity = 8192 +┌─table──────────────────────┬─name──────────┬─comment─────┐ +│ check_query_comment_column │ first_column │ comment 1_3 │ +│ check_query_comment_column │ second_column │ comment 2_3 │ +│ check_query_comment_column │ third_column │ comment 3_3 │ +└────────────────────────────┴───────────────┴─────────────┘ diff --git a/dbms/tests/queries/0_stateless/00725_comment_columns.sql b/dbms/tests/queries/0_stateless/00725_comment_columns.sql new file mode 100644 index 0000000000..9766761698 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00725_comment_columns.sql @@ -0,0 +1,91 @@ +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.check_query_comment_column; + +-- Check COMMENT COLUMN and MODIFY COLUMN statements with simple engine +CREATE TABLE test.check_query_comment_column + ( + first_column UInt8 DEFAULT 1 COMMENT 'comment 1', + second_column UInt8 MATERIALIZED first_column COMMENT 'comment 2', + third_column UInt8 ALIAS second_column COMMENT 'comment 3', + fourth_column UInt8 COMMENT 'comment 4', + fifth_column UInt8 + ) ENGINE = TinyLog; + +SHOW CREATE TABLE test.check_query_comment_column; +DESCRIBE TABLE test.check_query_comment_column; + +SELECT table, name, comment +FROM system.columns +WHERE table = 'check_query_comment_column' AND database = 'test' +FORMAT PrettyCompactNoEscapes; + +ALTER TABLE test.check_query_comment_column + COMMENT COLUMN first_column 'comment 1_1', + COMMENT COLUMN second_column 'comment 2_1', + COMMENT COLUMN third_column 'comment 3_1', + COMMENT COLUMN fourth_column 'comment 4_1', + COMMENT COLUMN fifth_column 'comment 5_1'; + +SHOW CREATE TABLE test.check_query_comment_column; + +ALTER TABLE test.check_query_comment_column + MODIFY COLUMN first_column COMMENT 'comment 1_2', + MODIFY COLUMN second_column COMMENT 'comment 2_2', + MODIFY COLUMN third_column COMMENT 'comment 3_2', + MODIFY COLUMN fourth_column COMMENT 'comment 4_2', + MODIFY COLUMN fifth_column COMMENT 'comment 5_2'; + +SELECT table, name, comment +FROM system.columns +WHERE table = 'check_query_comment_column' AND database = 'test' +FORMAT PrettyCompactNoEscapes; + +SHOW CREATE TABLE test.check_query_comment_column; +DROP TABLE IF EXISTS test.check_query_comment_column; + +-- Check `ALTER TABLE table_name COMMENT COLUMN 'comment'` statement with MergeTree engine +CREATE TABLE test.check_query_comment_column + ( + first_column UInt8 COMMENT 'comment 1', + second_column UInt8 COMMENT 'comment 2', + third_column UInt8 COMMENT 'comment 3' + ) ENGINE = MergeTree() + ORDER BY first_column + PARTITION BY second_column + SAMPLE BY first_column; + +SHOW CREATE TABLE test.check_query_comment_column; +DESCRIBE TABLE test.check_query_comment_column; + +SELECT table, name, comment +FROM system.columns +WHERE table = 'check_query_comment_column' AND database = 'test' +FORMAT PrettyCompactNoEscapes; + +ALTER TABLE test.check_query_comment_column + COMMENT COLUMN first_column 'comment 1_2', + COMMENT COLUMN second_column 'comment 2_2', + COMMENT COLUMN third_column 'comment 3_2'; + +SHOW CREATE TABLE test.check_query_comment_column; + +ALTER TABLE test.check_query_comment_column + MODIFY COLUMN first_column COMMENT 'comment 1_3', + MODIFY COLUMN second_column COMMENT 'comment 2_3', + MODIFY COLUMN third_column COMMENT 'comment 3_3'; + +SHOW CREATE TABLE test.check_query_comment_column; + +ALTER TABLE test.check_query_comment_column + MODIFY COLUMN first_column DEFAULT 1 COMMENT 'comment 1_3', + MODIFY COLUMN second_column COMMENT 'comment 2_3', -- We can't change default value of partition key. + MODIFY COLUMN third_column DEFAULT 1 COMMENT 'comment 3_3'; + +SELECT table, name, comment +FROM system.columns +WHERE table = 'check_query_comment_column' and database = 'test' +FORMAT PrettyCompactNoEscapes; + +DROP TABLE IF EXISTS test.check_query_comment_column; + +-- TODO: add here tests with ReplicatedMergeTree diff --git a/dbms/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.reference b/dbms/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.reference new file mode 100644 index 0000000000..20076c05d5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.reference @@ -0,0 +1,10 @@ +[] +[0] +[0] +[0,2] +[0,2] +[0,2,4] +[0,2,4] +[0,2,4,6] +[0,2,4,6] +[0,2,4,6,8] diff --git a/dbms/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql b/dbms/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql new file mode 100644 index 0000000000..eea080d7b9 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql @@ -0,0 +1,7 @@ +set allow_experimental_low_cardinality_type = 1; +drop table if exists test.lc_lambda; +create table test.lc_lambda (arr Array(LowCardinality(UInt64))) engine = Memory; +insert into test.lc_lambda select range(number) from system.numbers limit 10; +select arrayFilter(x -> x % 2 == 0, arr) from test.lc_lambda; +drop table if exists test.lc_lambda; + diff --git a/dbms/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference b/dbms/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference new file mode 100644 index 0000000000..3c344a523a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference @@ -0,0 +1,4 @@ +CREATE TABLE test.check_comments ( column_name1 UInt8 DEFAULT 1 COMMENT \'comment\') ENGINE = ReplicatedMergeTree(\'clickhouse/tables/test_comments\', \'r1\') ORDER BY column_name1 SETTINGS index_granularity = 8192 +column_name1 UInt8 DEFAULT 1 comment +CREATE TABLE test.check_comments ( column_name1 UInt8 DEFAULT 1 COMMENT \'another comment\') ENGINE = ReplicatedMergeTree(\'clickhouse/tables/test_comments\', \'r1\') ORDER BY column_name1 SETTINGS index_granularity = 8192 +column_name1 UInt8 DEFAULT 1 another comment diff --git a/dbms/tests/queries/0_stateless/00753_comment_columns_zookeeper.sql b/dbms/tests/queries/0_stateless/00753_comment_columns_zookeeper.sql new file mode 100644 index 0000000000..e833e79b91 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00753_comment_columns_zookeeper.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS test.check_comments; + +CREATE TABLE test.check_comments + ( + column_name1 UInt8 DEFAULT 1 COMMENT 'comment' + ) ENGINE = ReplicatedMergeTree('clickhouse/tables/test_comments', 'r1') + ORDER BY column_name1; + +SHOW CREATE test.check_comments; +DESC test.check_comments; + +ALTER TABLE test.check_comments COMMENT COLUMN column_name1 'another comment'; + +SHOW CREATE test.check_comments; +DESC test.check_comments; + +SELECT * FROM system.columns WHERE table = 'check.comments' and database = 'test'; + +DROP TABLE test.check_comments; diff --git a/dbms/tests/queries/0_stateless/00753_quantile_format.sql b/dbms/tests/queries/0_stateless/00753_quantile_format.sql index 9db4d51db1..d5345c346f 100644 --- a/dbms/tests/queries/0_stateless/00753_quantile_format.sql +++ b/dbms/tests/queries/0_stateless/00753_quantile_format.sql @@ -1,3 +1,5 @@ +DROP TABLE IF EXISTS test.datetime; + CREATE TABLE test.datetime (d DateTime) ENGINE = Memory; INSERT INTO test.datetime(d) VALUES(toDateTime('2016-06-15 23:00:00')); @@ -24,3 +26,5 @@ SELECT quantilesTDigest(0.2)(d) FROM test.datetime; SELECT quantileTDigestWeighted(0.2)(d, 1) FROM test.datetime; SELECT quantilesTDigestWeighted(0.2)(d, 1) FROM test.datetime; + +DROP TABLE test.datetime; diff --git a/dbms/tests/queries/0_stateless/00753_system_columns_and_system_tables.reference b/dbms/tests/queries/0_stateless/00753_system_columns_and_system_tables.reference new file mode 100644 index 0000000000..934c7ada99 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00753_system_columns_and_system_tables.reference @@ -0,0 +1,25 @@ +┌─name────────────────┬─primary_key─┬─order_key─┬─partition_key─┬─sample_key─┐ +│ check_system_tables │ name1 │ │ name2 │ name1 │ +└─────────────────────┴─────────────┴───────────┴───────────────┴────────────┘ +┌─name──┬─is_in_primary_key─┬─is_in_order_key─┬─is_in_partition_key─┬─is_in_sample_key─┐ +│ name1 │ 1 │ 1 │ 0 │ 1 │ +│ name2 │ 0 │ 0 │ 1 │ 0 │ +│ name3 │ 0 │ 0 │ 0 │ 0 │ +└───────┴───────────────────┴─────────────────┴─────────────────────┴──────────────────┘ +┌─name────────────────┬─primary_key─┬─order_key─┬─partition_key─┬─sample_key─┐ +│ check_system_tables │ date │ version │ date │ │ +└─────────────────────┴─────────────┴───────────┴───────────────┴────────────┘ +┌─name────┬─is_in_primary_key─┬─is_in_order_key─┬─is_in_partition_key─┬─is_in_sample_key─┐ +│ date │ 1 │ 1 │ 1 │ 0 │ +│ value │ 0 │ 0 │ 0 │ 0 │ +│ version │ 0 │ 1 │ 0 │ 0 │ +│ sign │ 0 │ 0 │ 0 │ 0 │ +└─────────┴───────────────────┴─────────────────┴─────────────────────┴──────────────────┘ +┌─name────────────────┬─primary_key───────────────────────┬─order_key─┬─partition_key───┬─sample_key────────┐ +│ check_system_tables │ Counter, Event, intHash32(UserId) │ │ toYYYYMM(Event) │ intHash32(UserId) │ +└─────────────────────┴───────────────────────────────────┴───────────┴─────────────────┴───────────────────┘ +┌─name────┬─is_in_primary_key─┬─is_in_order_key─┬─is_in_partition_key─┬─is_in_sample_key─┐ +│ Event │ 1 │ 1 │ 1 │ 0 │ +│ UserId │ 0 │ 0 │ 0 │ 1 │ +│ Counter │ 1 │ 1 │ 0 │ 0 │ +└─────────┴───────────────────┴─────────────────┴─────────────────────┴──────────────────┘ diff --git a/dbms/tests/queries/0_stateless/00753_system_columns_and_system_tables.sql b/dbms/tests/queries/0_stateless/00753_system_columns_and_system_tables.sql new file mode 100644 index 0000000000..8ceb5f881e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00753_system_columns_and_system_tables.sql @@ -0,0 +1,68 @@ +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.check_system_tables; + +-- Check MergeTree declaration in new format +CREATE TABLE test.check_system_tables + ( + name1 UInt8, + name2 UInt8, + name3 UInt8 + ) ENGINE = MergeTree() + ORDER BY name1 + PARTITION BY name2 + SAMPLE BY name1; + +SELECT name, primary_key, order_key, partition_key, sample_key +FROM system.tables +WHERE name = 'check_system_tables' +FORMAT PrettyCompactNoEscapes; + +SELECT name, is_in_primary_key, is_in_order_key, is_in_partition_key, is_in_sample_key +FROM system.columns +WHERE table = 'check_system_tables' +FORMAT PrettyCompactNoEscapes; + +DROP TABLE IF EXISTS test.check_system_tables; + +-- Check VersionedCollapsingMergeTree +CREATE TABLE test.check_system_tables + ( + date Date, + value String, + version UInt64, + sign Int8 + ) ENGINE = VersionedCollapsingMergeTree(sign, version) + PARTITION BY date + ORDER BY date; + +SELECT name, primary_key, order_key, partition_key, sample_key +FROM system.tables +WHERE name = 'check_system_tables' +FORMAT PrettyCompactNoEscapes; + +SELECT name, is_in_primary_key, is_in_order_key, is_in_partition_key, is_in_sample_key +FROM system.columns +WHERE table = 'check_system_tables' +FORMAT PrettyCompactNoEscapes; + +DROP TABLE IF EXISTS test.check_system_tables; + +-- Check MergeTree declaration in old format +CREATE TABLE test.check_system_tables + ( + Event Date, + UserId UInt32, + Counter UInt32 + ) ENGINE = MergeTree(Event, intHash32(UserId), (Counter, Event, intHash32(UserId)), 8192); + +SELECT name, primary_key, order_key, partition_key, sample_key +FROM system.tables +WHERE name = 'check_system_tables' +FORMAT PrettyCompactNoEscapes; + +SELECT name, is_in_primary_key, is_in_order_key, is_in_partition_key, is_in_sample_key +FROM system.columns +WHERE table = 'check_system_tables' +FORMAT PrettyCompactNoEscapes; + +DROP TABLE IF EXISTS test.check_system_tables; diff --git a/dbms/tests/queries/0_stateless/00754_first_significant_subdomain_more.reference b/dbms/tests/queries/0_stateless/00754_first_significant_subdomain_more.reference new file mode 100644 index 0000000000..1163aa3ee2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00754_first_significant_subdomain_more.reference @@ -0,0 +1,3 @@ +usa +pentagon +stanford diff --git a/dbms/tests/queries/0_stateless/00754_first_significant_subdomain_more.sql b/dbms/tests/queries/0_stateless/00754_first_significant_subdomain_more.sql new file mode 100644 index 0000000000..32a4c24fb3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00754_first_significant_subdomain_more.sql @@ -0,0 +1 @@ +SELECT firstSignificantSubdomain(arrayJoin(['http://usa.gov.com/cgi-bin/yabb.pl?password=qwerty', 'https://www2.pentagon.mil.net/index.phtml', 'ftp://stanford.edu.org/~ivanov/phd-thesis.SHTM'])); diff --git a/dbms/tests/queries/0_stateless/00755_avg_value_size_hint_passing.reference b/dbms/tests/queries/0_stateless/00755_avg_value_size_hint_passing.reference new file mode 100644 index 0000000000..05af1594ea --- /dev/null +++ b/dbms/tests/queries/0_stateless/00755_avg_value_size_hint_passing.reference @@ -0,0 +1 @@ +10000 10000000 diff --git a/dbms/tests/queries/0_stateless/00755_avg_value_size_hint_passing.sql b/dbms/tests/queries/0_stateless/00755_avg_value_size_hint_passing.sql new file mode 100644 index 0000000000..c0551f2f7e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00755_avg_value_size_hint_passing.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS test.size_hint; +CREATE TABLE test.size_hint (s Array(String)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 1000; + +SET max_block_size = 1000; +SET max_memory_usage = 1000000000; +INSERT INTO test.size_hint SELECT arrayMap(x -> 'Hello', range(1000)) FROM numbers(10000); + +SET max_memory_usage = 100000000, max_threads = 2; +SELECT count(), sum(length(s)) FROM test.size_hint; + +DROP TABLE test.size_hint; diff --git a/dbms/tests/server-test.xml b/dbms/tests/server-test.xml index cb9d63aa94..82b76f62fa 100644 --- a/dbms/tests/server-test.xml +++ b/dbms/tests/server-test.xml @@ -61,6 +61,20 @@ + + + + localhost + 59000 + + + + + localhost + 59000 + + + diff --git a/debian/.pbuilderrc b/debian/.pbuilderrc index bc12479eb1..2bc4fd7534 100644 --- a/debian/.pbuilderrc +++ b/debian/.pbuilderrc @@ -161,11 +161,11 @@ esac if [ "$ARCH" != arm64 ]; then case "$DIST" in - "unstable") + "experimental") EXTRAPACKAGES+=" liblld-8-dev libclang-8-dev llvm-8-dev liblld-8 " export CMAKE_FLAGS="-DLLVM_VERSION_POSTFIX=-8 $CMAKE_FLAGS" ;; - "cosmic" | "experimental" | "testing") + "cosmic" | "testing" | "unstable") EXTRAPACKAGES+=" liblld-7-dev libclang-7-dev llvm-7-dev liblld-7 " export CMAKE_FLAGS="-DLLVM_VERSION_POSTFIX=-7 $CMAKE_FLAGS" ;; diff --git a/debian/clickhouse-server.init b/debian/clickhouse-server.init index 92978c7225..0df35f5595 100755 --- a/debian/clickhouse-server.init +++ b/debian/clickhouse-server.init @@ -14,6 +14,7 @@ CLICKHOUSE_GROUP=${CLICKHOUSE_USER} SHELL=/bin/bash PROGRAM=clickhouse-server GENERIC_PROGRAM=clickhouse +CLICKHOUSE_PROGRAM_ENV="" EXTRACT_FROM_CONFIG=${GENERIC_PROGRAM}-extract-from-config SYSCONFDIR=/etc/$PROGRAM CLICKHOUSE_LOGDIR=/var/log/clickhouse-server @@ -168,7 +169,7 @@ start() if ! is_running; then # Lock should not be held while running child process, so we release the lock. Note: obviously, there is race condition. # But clickhouse-server has protection from simultaneous runs with same data directory. - su -s $SHELL ${CLICKHOUSE_USER} -c "$FLOCK -u 9; exec -a \"$PROGRAM\" \"$BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\"" + su -s $SHELL ${CLICKHOUSE_USER} -c "$FLOCK -u 9; $CLICKHOUSE_PROGRAM_ENV exec -a \"$PROGRAM\" \"$BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\"" EXIT_STATUS=$? if [ $EXIT_STATUS -ne 0 ]; then break diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 9e0abd3afd..f7dfcc2513 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -12,17 +12,41 @@ RUN apt-get update -y \ gcc-8 \ g++-8 \ clang-6.0 \ - clang++-6.0 \ lld-6.0 \ libclang-6.0-dev \ - libicu-dev \ liblld-6.0-dev \ + llvm-6.0 \ + libllvm6.0 \ + llvm-6.0-dev \ + libicu-dev \ libreadline-dev \ ninja-build \ perl \ pkg-config \ devscripts \ debhelper \ - git + git \ + libc++abi-dev \ + libc++-dev \ + libboost-program-options-dev \ + libboost-system-dev \ + libboost-filesystem-dev \ + libboost-thread-dev \ + zlib1g-dev \ + liblz4-dev \ + libdouble-conversion-dev \ + libsparsehash-dev \ + librdkafka-dev \ + libpoconetssl50 \ + libpoco-dev \ + libsparsehash-dev \ + libgoogle-perftools-dev \ + libzstd-dev \ + libltdl-dev \ + libre2-dev \ + libjemalloc-dev \ + unixodbc-dev \ + odbcinst + CMD /bin/bash build/release --no-pbuilder && mv /*.deb /output diff --git a/docker/packager/packager b/docker/packager/packager index c0006c7b0b..38c4dc107c 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -52,7 +52,7 @@ def run_image_with_env(image_name, output, env_variables, ch_root): subprocess.check_call(cmd, shell=True) -def parse_env_variables(build_type, compiler, sanitizer, package_type, cache, distcc_hosts): +def parse_env_variables(build_type, compiler, sanitizer, package_type, cache, distcc_hosts, unbundled): result = [] if package_type == "deb": result.append("DEB_CC={}".format(compiler)) @@ -75,6 +75,8 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, cache, di elif cache == "distcc": result.append('DISTCC_HOSTS="{}"'.format("localhost/`nproc`")) + if unbundled: + result.append('CMAKE_FLAGS="-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 $CMAKE_FLAGS"') return result if __name__ == "__main__": @@ -86,6 +88,7 @@ if __name__ == "__main__": parser.add_argument("--build-type", choices=("debug", ""), default="") parser.add_argument("--compiler", choices=("clang-6.0", "gcc-7", "gcc-8"), default="gcc-7") parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") + parser.add_argument("--unbundled", action="store_true") parser.add_argument("--cache", choices=("", "ccache", "distcc"), default="") parser.add_argument("--distcc-hosts", nargs="+") parser.add_argument("--force-build-image", action="store_true") @@ -105,6 +108,6 @@ if __name__ == "__main__": if not check_image_exists_locally(image_name) or args.force_build_image: if not pull_image(image_name) or args.force_build_image: build_image(image_name, dockerfile) - env_prepared = parse_env_variables(args.build_type, args.compiler, args.sanitizer, args.package_type, args.cache, args.distcc_hosts) + env_prepared = parse_env_variables(args.build_type, args.compiler, args.sanitizer, args.package_type, args.cache, args.distcc_hosts, args.unbundled) run_image_with_env(image_name, args.output_dir, env_prepared, ch_root) logging.info("Output placed into {}".format(args.output_dir)) diff --git a/docker/test/integration/Dockerfile b/docker/test/integration/Dockerfile new file mode 100644 index 0000000000..e3237814fa --- /dev/null +++ b/docker/test/integration/Dockerfile @@ -0,0 +1,6 @@ +FROM ubuntu:18.04 + +RUN apt-get update && apt-get -y install tzdata + +ENV TZ=Europe/Moscow +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 5664a16078..9cd0a2ba97 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:18.04 +FROM yandex/clickhouse-deb-builder RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 8954c41efa..7987e04227 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -25,4 +25,4 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ dpkg -i package_folder/clickhouse-server_*.deb; \ dpkg -i package_folder/clickhouse-client_*.deb; \ dpkg -i package_folder/clickhouse-test_*.deb; \ - service clickhouse-server start && sleep 1 && ./stress --output-folder test_output \ No newline at end of file + service clickhouse-server start && sleep 1 && ./stress --output-folder test_output diff --git a/docs/en/interfaces/http_interface.md b/docs/en/interfaces/http_interface.md index e514f0c496..2d7b69f60f 100644 --- a/docs/en/interfaces/http_interface.md +++ b/docs/en/interfaces/http_interface.md @@ -1,3 +1,5 @@ + + # HTTP Interface The HTTP interface lets you use ClickHouse on any platform from any programming language. We use it for working from Java and Perl, as well as shell scripts. In other departments, the HTTP interface is used from Perl, Python, and Go. The HTTP interface is more limited than the native interface, but it has better compatibility. diff --git a/docs/en/operations/settings/permissions_for_queries.md b/docs/en/operations/settings/permissions_for_queries.md new file mode 100644 index 0000000000..8c731ed1ce --- /dev/null +++ b/docs/en/operations/settings/permissions_for_queries.md @@ -0,0 +1,53 @@ + + +# Permissions for queries + +Queries in ClickHouse can be divided into several groups: + +1. Read data queries: `SELECT`, `SHOW`, `DESCRIBE`, `EXISTS`. +1. Write data queries: `INSERT`, `OPTIMIZE`. +1. Change settings queries: `SET`, `USE`. +1. [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries: `CREATE`, `ALTER`, `RENAME`, `ATTACH`, `DETACH`, `DROP` `TRUNCATE`. +1. Particular queries: `KILL QUERY`. + +The following settings regulate user permissions for the groups of queries: + +- [readonly](#settings_readonly) — Restricts permissions for all groups of queries excepting DDL. +- [allow_ddl](#settings_allow_ddl) — Restricts permissions for DDL queries. + +`KILL QUERY` performs with any settings. + + + +## readonly + +Restricts permissions for read data, write data and change settings queries. + +See [above](#permissions_for_queries) for the division of queries into groups. + +**Possible values** + +- 0 — All queries are allowed. Default value. +- 1 — Read data queries only are allowed. +- 2 — Read data and change settings queries are allowed. + +After setting `readonly = 1`, a user can't change `readonly` and `allow_ddl` settings in the current session. + +When using the `GET` method in the [HTTP interface](../../interfaces/http_interface.md#http_interface), `readonly = 1` is set automatically. To modify data use the `POST` method. + + + +## allow_ddl + +Allows/denies [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries. + +See [above](#permissions_for_queries) for the division of queries into groups. + +**Possible values** + +- 0 — DDL queries are not allowed. +- 1 — DDL queries are allowed. Default value. + +You can not execute `SET allow_ddl = 1` if `allow_ddl = 0` for current session. + +[Original article](https://clickhouse.yandex/docs/en/operations/settings/permissions_for_queries/) diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md index 67418128ff..2b3bc45930 100644 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -16,18 +16,6 @@ It can take one of two values: `throw` or `break`. Restrictions on aggregation ( `any (only for group_by_overflow_mode)` – Continuing aggregation for the keys that got into the set, but don't add new keys to the set. - - -## readonly - -With a value of 0, you can execute any queries. -With a value of 1, you can only execute read requests (such as SELECT and SHOW). Requests for writing and changing settings (INSERT, SET) are prohibited. -With a value of 2, you can process read queries (SELECT, SHOW) and change settings (SET). - -After enabling readonly mode, you can't disable it in the current session. - -When using the GET method in the HTTP interface, 'readonly = 1' is set automatically. In other words, for queries that modify data, you can only use the POST method. You can send the query itself either in the POST body, or in the URL parameter. - ## max_memory_usage diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index a34f79f869..bd058afb6d 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -35,6 +35,11 @@ For small amounts of data (up to \~200 GB compressed), it is best to use as much For large amounts of data and when processing interactive (online) queries, you should use a reasonable amount of RAM (128 GB or more) so the hot data subset will fit in the cache of pages. Even for data volumes of \~50 TB per server, using 128 GB of RAM significantly improves query performance compared to 64 GB. +Do not disable overcommit. The value `cat /proc/sys/vm/overcommit_memory` should be 0 or 1. Run +``` +echo 0 | sudo tee /proc/sys/vm/overcommit_memory +``` + ## Swap File Always disable the swap file. The only reason for not doing this is if you are using ClickHouse on your personal laptop. @@ -74,7 +79,7 @@ echo 4096 | sudo tee /sys/block/md2/md/stripe_cache_size Calculate the exact number from the number of devices and the block size, using the formula: `2 * num_devices * chunk_size_in_bytes / 4096`. -A block size of 1025 KB is sufficient for all RAID configurations. +A block size of 1024 KB is sufficient for all RAID configurations. Never set the block size too small or too large. You can use RAID-0 on SSD. diff --git a/docs/en/query_language/agg_functions/reference.md b/docs/en/query_language/agg_functions/reference.md index 711ce006be..c33d258d63 100644 --- a/docs/en/query_language/agg_functions/reference.md +++ b/docs/en/query_language/agg_functions/reference.md @@ -283,15 +283,17 @@ The result is determinate (it doesn't depend on the order of query processing). This function provides excellent accuracy even for data sets with extremely high cardinality (over 10 billion elements). It is recommended for default use. -## uniqCombined(x) +## uniqCombined(HLL_precision)(x) Calculates the approximate number of different values of the argument. Works for numbers, strings, dates, date-with-time, and for multiple arguments and tuple arguments. -A combination of three algorithms is used: array, hash table and [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) with an error correction table. The memory consumption is several times smaller than for the `uniq` function, and the accuracy is several times higher. Performance is slightly lower than for the `uniq` function, but sometimes it can be even higher than it, such as with distributed queries that transmit a large number of aggregation states over the network. The maximum state size is 96 KiB (HyperLogLog of 217 6-bit cells). +A combination of three algorithms is used: array, hash table and [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) with an error correction table. For small number of distinct elements, the array is used. When the set size becomes larger the hash table is used, while it is smaller than HyperLogLog data structure. For larger number of elements, the HyperLogLog is used, and it will occupy fixed amount of memory. -The result is determinate (it doesn't depend on the order of query processing). +The parameter "HLL_precision" is the base-2 logarithm of the number of cells in HyperLogLog. You can omit the parameter (omit first parens). The default value is 17, that is effectively 96 KiB of space (2^17 cells of 6 bits each). The memory consumption is several times smaller than for the `uniq` function, and the accuracy is several times higher. Performance is slightly lower than for the `uniq` function, but sometimes it can be even higher than it, such as with distributed queries that transmit a large number of aggregation states over the network. -The `uniqCombined` function is a good default choice for calculating the number of different values, but keep in mind that the estimation error will increase for high-cardinality data sets (200M+ elements), and the function will return very inaccurate results for data sets with extremely high cardinality (1B+ elements). +The result is deterministic (it doesn't depend on the order of query processing). + +The `uniqCombined` function is a good default choice for calculating the number of different values, but keep in mind that the estimation error for large sets (200 million elements and more) will become larger than theoretical value due to poor choice of hash function. ## uniqHLL12(x) diff --git a/docs/en/query_language/functions/array_functions.md b/docs/en/query_language/functions/array_functions.md index 65c234935c..1a8858b8dc 100644 --- a/docs/en/query_language/functions/array_functions.md +++ b/docs/en/query_language/functions/array_functions.md @@ -55,7 +55,7 @@ arrayConcat(arrays) **Parameters** -- `arrays` – arbitrary number of arguments of type Array. +- `arrays` – Arbitrary number of arguments of [Array][../../data_types/array.md#data_type-array] type. **Example** diff --git a/docs/en/query_language/functions/string_functions.md b/docs/en/query_language/functions/string_functions.md index 251fbd53a9..29b8583624 100644 --- a/docs/en/query_language/functions/string_functions.md +++ b/docs/en/query_language/functions/string_functions.md @@ -74,5 +74,13 @@ If the 's' string is non-empty and does not contain the 'c' character at the end Returns the string 's' that was converted from the encoding in 'from' to the encoding in 'to'. +## base64Encode(s) +Encodes 's' string into base64 -[Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_functions/) +## base64Decode(s) +Decode base64-encoded string 's' into original string. In case of failure raises an exception. + +## tryBase64Decode(s) +Similar to base64Decode, but in case of error an empty string would be returned. + +[Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_functions/) \ No newline at end of file diff --git a/docs/fa/operations/settings/permissions_for_queries.md b/docs/fa/operations/settings/permissions_for_queries.md new file mode 120000 index 0000000000..ce8473bf01 --- /dev/null +++ b/docs/fa/operations/settings/permissions_for_queries.md @@ -0,0 +1 @@ +../../../en/operations/settings/permissions_for_queries.md \ No newline at end of file diff --git a/docs/ru/operations/settings/permissions_for_queries.md b/docs/ru/operations/settings/permissions_for_queries.md new file mode 120000 index 0000000000..ce8473bf01 --- /dev/null +++ b/docs/ru/operations/settings/permissions_for_queries.md @@ -0,0 +1 @@ +../../../en/operations/settings/permissions_for_queries.md \ No newline at end of file diff --git a/docs/ru/operations/tips.md b/docs/ru/operations/tips.md index 4a6d8787a3..a66a760bc7 100644 --- a/docs/ru/operations/tips.md +++ b/docs/ru/operations/tips.md @@ -35,6 +35,11 @@ sudo echo 'performance' | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_gover Для больших объемов данных, при выполнении интерактивных (онлайн) запросов, стоит использовать разумный объем оперативной памяти (128 Гб или более) для того, чтобы горячее подмножество данных поместилось в кеше страниц. Даже для объемов данных в \~50 Тб на сервер, использование 128 Гб оперативной памяти намного лучше для производительности выполнения запросов, чем 64 Гб. +Не выключайте overcommit. Значение `cat /proc/sys/vm/overcommit_memory` должно быть 0 или 1. Выполните: +``` +echo 0 | sudo tee /proc/sys/vm/overcommit_memory +``` + ## Файл подкачки Всегда отключайте файл подкачки. Единственной причиной этого не делать может быть только использование ClickHouse на личном ноутбуке. diff --git a/docs/ru/query_language/agg_functions/reference.md b/docs/ru/query_language/agg_functions/reference.md index d454df4919..557070e3ac 100644 --- a/docs/ru/query_language/agg_functions/reference.md +++ b/docs/ru/query_language/agg_functions/reference.md @@ -284,15 +284,17 @@ GROUP BY timeslot Функция обеспечивает высокую точность даже для множеств с высокой кардинальностью (более 10 миллиардов элементов). Рекомендуется для использования по умолчанию. -## uniqCombined(x) +## uniqCombined(HLL_precision)(x) Приближённо вычисляет количество различных значений аргумента. Работает для чисел, строк, дат, дат-с-временем, для нескольких аргументов и аргументов-кортежей. -Используется комбинация трех алгоритмов: массив, хэш-таблица и [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) с таблицей коррекции погрешности. Расход памяти в несколько раз меньше, чем у функции `uniq`, а точность в несколько раз выше. Скорость работы чуть ниже, чем у функции `uniq`, но иногда может быть даже выше - в случае распределённых запросов, в которых по сети передаётся большое количество состояний агрегации. Максимальный размер состояния составляет 96 KiB (HyperLogLog из 217 6-битовых ячеек). +Используется комбинация трёх алгоритмов: массив, хэш-таблица и [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) с таблицей коррекции погрешности. Для небольшого количества различных значений используется массив; при увеличении количества значений, используется хэш таблица, до тех пор, пока её размер меньше размера HyperLogLog структуры. При дальнейшем увеличении количества значений, используется HyperLogLog структура, имеющая фиксированный размер в памяти. + +Параметр HLL_precision - логарифм по основанию 2 от количества ячеек в HyperLogLog. Параметер можно не указывать (для этого, опустите первую пару скобок). По-умолчанию - 17. При использовании параметра по-умолчанию, расход памяти в несколько раз меньше, чем у функции `uniq`, а точность в несколько раз выше. Скорость работы чуть ниже, чем у функции `uniq`, но иногда может быть даже выше - в случае распределённых запросов, в которых по сети передаётся большое количество состояний агрегации. Каждая ячейка имеет размер 6 бит, что даёт 96 KiB для размера HyperLogLog структуры. Результат детерминирован (не зависит от порядка выполнения запроса). -Функция `uniqCombined` является хорошим выбором по умолчанию для подсчёта количества различных значений, но стоит иметь ввиду что для множеств большой кардинальности (200M+) ошибка оценки будет только расти и для множеств огромной кардинальности (1B+ элементов) функция возвращает результат с очень большой неточностью. +Функция `uniqCombined` является хорошим выбором по умолчанию для подсчёта количества различных значений, но стоит иметь ввиду что для множеств большой кардинальности (200 миллионов различных элементов и больше) ошибка оценки становится существенно больше расчётной из-за недостаточно хорошего выбора хэш-функции. ## uniqHLL12(x) diff --git a/docs/ru/query_language/functions/array_functions.md b/docs/ru/query_language/functions/array_functions.md index 1cbe686cef..654bf30864 100644 --- a/docs/ru/query_language/functions/array_functions.md +++ b/docs/ru/query_language/functions/array_functions.md @@ -55,7 +55,7 @@ arrayConcat(arrays) **Параметры** -- `arrays` - Перечисленные через запятую массивы `[values]`. +- `arrays` – произвольное количество элементов типа [Array][../../data_types/array.md#data_type-array]. **Пример** diff --git a/docs/ru/query_language/functions/string_functions.md b/docs/ru/query_language/functions/string_functions.md index a917fb4d91..9eb44859b3 100644 --- a/docs/ru/query_language/functions/string_functions.md +++ b/docs/ru/query_language/functions/string_functions.md @@ -59,4 +59,13 @@ ## convertCharset(s, from, to) Возвращает сконвертированную из кодировки from в кодировку to строку s. -[Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/string_functions/) +## base64Encode(s) +Производит кодирование строки s в base64-представление. + +## base64Decode(s) +Декодирует base64-представление s в исходную строку. При невозможности декодирования выбрасывает исключение + +## tryBase64Decode(s) +Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку. + +[Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/string_functions/) \ No newline at end of file diff --git a/docs/toc_en.yml b/docs/toc_en.yml index e6e63095dd..ec4ae4e861 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -157,10 +157,10 @@ nav: - 'Server settings': 'operations/server_settings/settings.md' - 'Settings': - 'Introduction': 'operations/settings/index.md' + - 'Permissions for queries': 'operations/settings/permissions_for_queries.md' - 'Restrictions on query complexity': 'operations/settings/query_complexity.md' - 'Settings': 'operations/settings/settings.md' - 'Settings profiles': 'operations/settings/settings_profiles.md' - - 'Utilities': - 'Overview': 'operations/utils/index.md' - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' diff --git a/docs/toc_fa.yml b/docs/toc_fa.yml index 780d974793..bf10398ce8 100644 --- a/docs/toc_fa.yml +++ b/docs/toc_fa.yml @@ -154,6 +154,7 @@ nav: - 'Server settings': 'operations/server_settings/settings.md' - 'Settings': - 'Introduction': 'operations/settings/index.md' + - 'Permissions for queries': 'operations/settings/permissions_for_queries.md' - 'Restrictions on query complexity': 'operations/settings/query_complexity.md' - 'Settings': 'operations/settings/settings.md' - 'Settings profiles': 'operations/settings/settings_profiles.md' diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml index 9c5ee2db74..00003aa935 100644 --- a/docs/toc_ru.yml +++ b/docs/toc_ru.yml @@ -158,6 +158,7 @@ nav: - 'Серверные настройки': 'operations/server_settings/settings.md' - 'Настройки': - 'Введение': 'operations/settings/index.md' + - 'Разрешения на выполнение запросов': 'operations/settings/permissions_for_queries.md' - 'Ограничения на сложность запроса': 'operations/settings/query_complexity.md' - 'Настройки': 'operations/settings/settings.md' - 'Профили настроек': 'operations/settings/settings_profiles.md' diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml index 3ae4f5de3c..3b5a3d2756 100644 --- a/docs/toc_zh.yml +++ b/docs/toc_zh.yml @@ -157,6 +157,7 @@ nav: - 'Server settings': 'operations/server_settings/settings.md' - 'Settings': - 'Introduction': 'operations/settings/index.md' + - 'Permissions for queries': 'operations/settings/permissions_for_queries.md' - 'Restrictions on query complexity': 'operations/settings/query_complexity.md' - 'Settings': 'operations/settings/settings.md' - 'Settings profiles': 'operations/settings/settings_profiles.md' diff --git a/docs/tools/mkdocs-material-theme/assets/javascripts/application.js b/docs/tools/mkdocs-material-theme/assets/javascripts/application.js index 3ab61ed15c..12b8519472 100644 --- a/docs/tools/mkdocs-material-theme/assets/javascripts/application.js +++ b/docs/tools/mkdocs-material-theme/assets/javascripts/application.js @@ -4887,13 +4887,14 @@ var Result = function () { /* Append trailing wildcard to all terms for prefix querying */ .query(function (query) { _this.value_.toLowerCase().split(" ").filter(Boolean).forEach(function (term) { + term = _exposeLoaderLunrLunr2.default.stemmer(new _exposeLoaderLunrLunr2.default.Token(term)).toString(); for (var lang in _this.lang_) { lang = _this.lang_[lang]; if (typeof(_exposeLoaderLunrLunr2.default[lang]) !== 'undefined' && typeof(_exposeLoaderLunrLunr2.default[lang].stemmer) !== 'undefined') { term = _exposeLoaderLunrLunr2.default[lang].stemmer(new _exposeLoaderLunrLunr2.default.Token(term)).toString(); } } - query.term(term, { wildcard: _exposeLoaderLunrLunr2.default.Query.wildcard.TRAILING }); + query.term(term, { wildcard: _exposeLoaderLunrLunr2.default.Query.wildcard.TRAILING | _exposeLoaderLunrLunr2.default.Query.wildcard.LEADING }); }); }) diff --git a/docs/zh/operations/settings/permissions_for_queries.md b/docs/zh/operations/settings/permissions_for_queries.md new file mode 120000 index 0000000000..ce8473bf01 --- /dev/null +++ b/docs/zh/operations/settings/permissions_for_queries.md @@ -0,0 +1 @@ +../../../en/operations/settings/permissions_for_queries.md \ No newline at end of file diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index 6d341bee4c..2754ecb953 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -93,27 +93,31 @@ endif () find_package (Threads) target_include_directories (common BEFORE PRIVATE ${CCTZ_INCLUDE_DIR}) -target_include_directories (common BEFORE PUBLIC ${CITYHASH_INCLUDE_DIR}) target_include_directories (common PUBLIC ${COMMON_INCLUDE_DIR}) if (NOT USE_INTERNAL_BOOST_LIBRARY) target_include_directories (common BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) endif () -target_link_libraries ( - common +target_link_libraries (common + PRIVATE pocoext + PUBLIC + ${Poco_Foundation_LIBRARY} ${CITYHASH_LIBRARIES} + PRIVATE ${CCTZ_LIBRARY} ${Boost_FILESYSTEM_LIBRARY} + PUBLIC ${Boost_SYSTEM_LIBRARY} + PRIVATE ${MALLOC_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} ${GLIBC_COMPATIBILITY_LIBRARIES} ${MEMCPY_LIBRARIES}) if (RT_LIBRARY) - target_link_libraries (common ${RT_LIBRARY}) + target_link_libraries (common PRIVATE ${RT_LIBRARY}) endif () if (ENABLE_TESTS) diff --git a/libs/libcommon/cmake/find_jemalloc.cmake b/libs/libcommon/cmake/find_jemalloc.cmake index 35e2a13358..eb8c9cb6ac 100644 --- a/libs/libcommon/cmake/find_jemalloc.cmake +++ b/libs/libcommon/cmake/find_jemalloc.cmake @@ -25,7 +25,7 @@ if (ENABLE_JEMALLOC) if ((NOT JEMALLOC_LIBRARIES OR NOT JEMALLOC_INCLUDE_DIR) AND NOT MISSING_INTERNAL_JEMALLOC_LIBRARY) set (JEMALLOC_LIBRARIES "jemalloc") - set (JEMALLOC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/jemalloc/include") + set (JEMALLOC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/jemalloc-cmake/include" "${ClickHouse_SOURCE_DIR}/contrib/jemalloc-cmake/include_linux_x86_64") set (USE_INTERNAL_JEMALLOC_LIBRARY 1) endif () diff --git a/libs/libdaemon/CMakeLists.txt b/libs/libdaemon/CMakeLists.txt index 448c83148b..b352602f81 100644 --- a/libs/libdaemon/CMakeLists.txt +++ b/libs/libdaemon/CMakeLists.txt @@ -17,10 +17,10 @@ add_library (daemon ${LINK_MODE} if (USE_UNWIND) target_compile_definitions (daemon PRIVATE USE_UNWIND=1) target_include_directories (daemon BEFORE PRIVATE ${UNWIND_INCLUDE_DIR}) - target_link_libraries (daemon ${UNWIND_LIBRARY}) + target_link_libraries (daemon PRIVATE ${UNWIND_LIBRARY}) endif () target_include_directories (daemon PUBLIC include) target_include_directories (daemon PRIVATE ${ClickHouse_SOURCE_DIR}/libs/libpocoext/include) -target_link_libraries (daemon clickhouse_common_io clickhouse_common_config ${Poco_Util_LIBRARY} ${EXECINFO_LIBRARY} ${ELF_LIBRARY}) +target_link_libraries (daemon PRIVATE clickhouse_common_io clickhouse_common_config common ${Poco_Net_LIBRARY} ${Poco_Util_LIBRARY} ${EXECINFO_LIBRARY} ${ELF_LIBRARY}) diff --git a/libs/libdaemon/include/daemon/BaseDaemon.h b/libs/libdaemon/include/daemon/BaseDaemon.h index de0ce49685..65d2092732 100644 --- a/libs/libdaemon/include/daemon/BaseDaemon.h +++ b/libs/libdaemon/include/daemon/BaseDaemon.h @@ -27,34 +27,25 @@ namespace Poco { class TaskManager; } -/// \brief Базовый класс для демонов +/// \brief Base class for applications that can run as deamons. /// /// \code -/// # Список возможных опций командной строки обрабатываемых демоном: -/// # --config-file или --config - имя файла конфигурации. По умолчанию - config.xml -/// # --pid-file - имя PID файла. По умолчанию - pid -/// # --log-file - имя лог файла -/// # --errorlog-file - имя лог файла, в который будут помещаться только ошибки -/// # --daemon - запустить в режиме демона; если не указан - логгирование будет вестись на консоль -/// --daemon --config-file=localfile.xml --pid-file=pid.pid --log-file=log.log --errorlog-file=error.log +/// # Some possible command line options: +/// # --config-file, -C or --config - path to configuration file. By default - config.xml in the current directory. +/// # --log-file +/// # --errorlog-file +/// # --daemon - run as daemon; without this option, the program will be attached to the terminal and also output logs to stderr. +/// --daemon --config-file=localfile.xml --log-file=log.log --errorlog-file=error.log /// \endcode /// -/// Если неперехваченное исключение выкинуто в других потоках (не Task-и), то по-умолчанию -/// используется KillingErrorHandler, который вызывает std::terminate. -/// -/// Кроме того, класс позволяет достаточно гибко управлять журналированием. В методе initialize() вызывается метод -/// buildLoggers() который и строит нужные логгеры. Эта функция ожидает увидеть в конфигурации определённые теги -/// заключённые в секции "logger". -/// Если нужно журналирование на консоль, нужно просто не использовать тег "log" или использовать --console. -/// Теги уровней вывода использовать можно в любом случае - - +/// You can configure different log options for different loggers used inside program +/// by providing subsections to "logger" in configuration file. class BaseDaemon : public Poco::Util::ServerApplication { friend class SignalListener; public: - static constexpr char DEFAULT_GRAPHITE_CONFIG_NAME[] = "graphite"; + static inline constexpr char DEFAULT_GRAPHITE_CONFIG_NAME[] = "graphite"; BaseDaemon(); ~BaseDaemon() override; @@ -182,7 +173,7 @@ protected: std::unique_ptr task_manager; - /// Создание и автоматическое удаление pid файла. + /// RAII wrapper for pid file. struct PID { std::string file; diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index 34297c937c..f65634f1e2 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -39,10 +39,8 @@ #include #include #include -#include #include #include -#include #include #include #include @@ -71,20 +69,6 @@ #include -using Poco::Logger; -using Poco::AutoPtr; -using Poco::Observer; -using Poco::FormattingChannel; -using Poco::SplitterChannel; -using Poco::ConsoleChannel; -using Poco::FileChannel; -using Poco::Path; -using Poco::Message; -using Poco::Util::AbstractConfiguration; - - -constexpr char BaseDaemon::DEFAULT_GRAPHITE_CONFIG_NAME[]; - /** For transferring information from signal handler to a separate thread. * If you need to do something serious in case of a signal (example: write a message to the log), * then sending information to a separate thread through pipe and doing all the stuff asynchronously @@ -109,7 +93,7 @@ struct Pipe write_fd = -1; if (0 != pipe(fds)) - DB::throwFromErrno("Cannot create pipe"); + DB::throwFromErrno("Cannot create pipe", 0); } void close() @@ -669,7 +653,7 @@ void BaseDaemon::buildLoggers(Poco::Util::AbstractConfiguration & config) std::cerr << "Logging " << log_level << " to " << log_path << std::endl; // Set up two channel chains. - log_file = new FileChannel; + log_file = new Poco::FileChannel; log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(log_path).absolute().toString()); log_file->setProperty(Poco::FileChannel::PROP_ROTATION, config.getRawString("logger.size", "100M")); log_file->setProperty(Poco::FileChannel::PROP_ARCHIVE, "number"); @@ -691,7 +675,7 @@ void BaseDaemon::buildLoggers(Poco::Util::AbstractConfiguration & config) createDirectory(errorlog_path); std::cerr << "Logging errors to " << errorlog_path << std::endl; - error_log_file = new FileChannel; + error_log_file = new Poco::FileChannel; error_log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(errorlog_path).absolute().toString()); error_log_file->setProperty(Poco::FileChannel::PROP_ROTATION, config.getRawString("logger.size", "100M")); error_log_file->setProperty(Poco::FileChannel::PROP_ARCHIVE, "number"); @@ -703,7 +687,7 @@ void BaseDaemon::buildLoggers(Poco::Util::AbstractConfiguration & config) Poco::AutoPtr pf = new OwnPatternFormatter(this); Poco::AutoPtr errorlog = new DB::OwnFormattingChannel(pf, error_log_file); - errorlog->setLevel(Message::PRIO_NOTICE); + errorlog->setLevel(Poco::Message::PRIO_NOTICE); errorlog->open(); split->addChannel(errorlog); } @@ -767,12 +751,12 @@ void BaseDaemon::buildLoggers(Poco::Util::AbstractConfiguration & config) Logger::root().setChannel(logger().getChannel()); // Explicitly specified log levels for specific loggers. - AbstractConfiguration::Keys levels; + Poco::Util::AbstractConfiguration::Keys levels; config.keys("logger.levels", levels); - if(!levels.empty()) - for(AbstractConfiguration::Keys::iterator it = levels.begin(); it != levels.end(); ++it) - Logger::get(*it).setLevel(config.getString("logger.levels." + *it, "trace")); + if (!levels.empty()) + for (const auto & level : levels) + Logger::get(level).setLevel(config.getString("logger.levels." + level, "trace")); } @@ -1077,7 +1061,7 @@ void BaseDaemon::logRevision() const /// Makes server shutdown if at least one Poco::Task have failed. void BaseDaemon::exitOnTaskError() { - Observer obs(*this, &BaseDaemon::handleNotification); + Poco::Observer obs(*this, &BaseDaemon::handleNotification); getTaskManager().addObserver(obs); } @@ -1085,7 +1069,7 @@ void BaseDaemon::exitOnTaskError() void BaseDaemon::handleNotification(Poco::TaskFailedNotification *_tfn) { task_failed = true; - AutoPtr fn(_tfn); + Poco::AutoPtr fn(_tfn); Logger *lg = &(logger()); LOG_ERROR(lg, "Task '" << fn->task()->name() << "' failed. Daemon is shutting down. Reason - " << fn->reason().displayText()); ServerApplication::terminate(); @@ -1205,7 +1189,7 @@ void BaseDaemon::handleSignal(int signal_id) onInterruptSignals(signal_id); } else - throw DB::Exception(std::string("Unsupported signal: ") + strsignal(signal_id)); + throw DB::Exception(std::string("Unsupported signal: ") + strsignal(signal_id), 0); } void BaseDaemon::onInterruptSignals(int signal_id) diff --git a/libs/libdaemon/src/ExtendedLogChannel.cpp b/libs/libdaemon/src/ExtendedLogChannel.cpp index 94413f8b8e..1f517cf5e9 100644 --- a/libs/libdaemon/src/ExtendedLogChannel.cpp +++ b/libs/libdaemon/src/ExtendedLogChannel.cpp @@ -8,13 +8,18 @@ namespace DB { +namespace ErrorCodes +{ + extern const int CANNOT_GETTIMEOFDAY; +} + ExtendedLogMessage ExtendedLogMessage::getFrom(const Poco::Message & base) { ExtendedLogMessage msg_ext(base); ::timeval tv; if (0 != gettimeofday(&tv, nullptr)) - DB::throwFromErrno("Cannot gettimeofday"); + DB::throwFromErrno("Cannot gettimeofday", ErrorCodes::CANNOT_GETTIMEOFDAY); msg_ext.time_seconds = static_cast(tv.tv_sec); msg_ext.time_microseconds = static_cast(tv.tv_usec); diff --git a/libs/libmysqlxx/CMakeLists.txt b/libs/libmysqlxx/CMakeLists.txt index fe7cc52224..a163d32511 100644 --- a/libs/libmysqlxx/CMakeLists.txt +++ b/libs/libmysqlxx/CMakeLists.txt @@ -42,20 +42,29 @@ else () find_library (ICONV_LIBRARY iconv) set (MYSQLCLIENT_LIBRARIES ${MYSQLCLIENT_LIBRARIES} ${STATIC_MYSQLCLIENT_LIB} ${ICONV_LIBRARY}) elseif (USE_STATIC_LIBRARIES AND STATIC_MYSQLCLIENT_LIB) + if (0) + # old lib patcher (ubuntu trusty?) set (MYSQLCLIENT_LIB ${CMAKE_CURRENT_BINARY_DIR}/libmysqlclient.a) + message(STATUS "will patch mysql lib ${STATIC_MYSQLCLIENT_LIB} => ${MYSQLCLIENT_LIB}") add_custom_command ( OUTPUT ${MYSQLCLIENT_LIB} COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/patch.sh ${STATIC_MYSQLCLIENT_LIB} ${MYSQLCLIENT_LIB} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${STATIC_MYSQLCLIENT_LIB} COMMENT "Patching mysqlclient library.") add_custom_target (our_mysql_client DEPENDS ${MYSQLCLIENT_LIB}) add_dependencies (mysqlxx our_mysql_client) set (MYSQLCLIENT_LIBRARIES ${MYSQLCLIENT_LIB}) + else() + set (MYSQLCLIENT_LIBRARIES ${STATIC_MYSQLCLIENT_LIB}) + endif() endif () endif () -target_link_libraries (mysqlxx common ${Poco_Util_LIBRARY} ${MYSQLCLIENT_LIBRARIES} ${OPENSSL_LIBRARIES} ${ZLIB_LIBRARIES} ${PLATFORM_LIBRARIES}) -target_include_directories (mysqlxx SYSTEM PRIVATE ${OPENSSL_INCLUDE_DIR}) +target_link_libraries (mysqlxx common ${Poco_Util_LIBRARY} ${MYSQLCLIENT_LIBRARIES} ${Boost_SYSTEM_LIBRARY} ${OPENSSL_LIBRARIES} ${ZLIB_LIBRARIES} ${PLATFORM_LIBRARIES}) +if (NOT USE_INTERNAL_MYSQL_LIBRARY) + target_include_directories (mysqlxx SYSTEM PRIVATE ${OPENSSL_INCLUDE_DIR}) +endif () if (ENABLE_TESTS) add_subdirectory (src/tests) diff --git a/utils/build/build_debian_unbundled.sh b/utils/build/build_debian_unbundled.sh index a78b0fc25d..2809e1f3cc 100755 --- a/utils/build/build_debian_unbundled.sh +++ b/utils/build/build_debian_unbundled.sh @@ -22,5 +22,5 @@ env TEST_RUN=1 \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ `# gdb - symbol test in pbuilder` \ - EXTRAPACKAGES="psmisc gdb clang-6.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev $EXTRAPACKAGES" \ pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT diff --git a/utils/check-marks/CMakeLists.txt b/utils/check-marks/CMakeLists.txt index 5b858f929e..9c53436469 100644 --- a/utils/check-marks/CMakeLists.txt +++ b/utils/check-marks/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (check-marks main.cpp) -target_link_libraries (check-marks clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries(check-marks PRIVATE clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 53acaee55a..a79d24568d 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -16,5 +16,5 @@ ROOT_PATH=$(git rev-parse --show-toplevel) find $ROOT_PATH/dbms -name '*.h' -or -name '*.cpp' | grep -vP 'Compiler|build' | - xargs grep $@ -P '((class|struct|namespace|enum|if|for|while|else|throw|switch).*|\)(\s*const)?(\s*override)?\s*)\{$|\s$|\t|^ {1,3}[^\* ]\S|\t' -# a curly brace not in a new line, but not for the case of C++11 init or agg. initialization | trailing whitespace | number of ws not a multiple of 4, but not in the case of comment continuation | a tab character + xargs grep $@ -P '((class|struct|namespace|enum|if|for|while|else|throw|switch).*|\)(\s*const)?(\s*override)?\s*)\{$|\s$|\t|^ {1,3}[^\* ]\S|\t|^\s*(if|else if|if constexpr|else if constexpr|for|while|catch|switch)\(' +# a curly brace not in a new line, but not for the case of C++11 init or agg. initialization | trailing whitespace | number of ws not a multiple of 4, but not in the case of comment continuation | a tab character | missing whitespace after for/if/while... before opening brace diff --git a/utils/compressor/CMakeLists.txt b/utils/compressor/CMakeLists.txt index 20f7a82aa3..cd14005120 100644 --- a/utils/compressor/CMakeLists.txt +++ b/utils/compressor/CMakeLists.txt @@ -1,19 +1,19 @@ find_package (Threads) add_executable (util-clickhouse-compressor main.cpp) -target_link_libraries (util-clickhouse-compressor clickhouse-compressor-lib) +target_link_libraries (util-clickhouse-compressor PRIVATE clickhouse-compressor-lib) set_target_properties(util-clickhouse-compressor PROPERTIES OUTPUT_NAME "clickhouse-compressor") #install (TARGETS util-clickhouse-compressor RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse-compressor) add_executable (zstd_test zstd_test.cpp) -target_link_libraries (zstd_test ${ZSTD_LIBRARY} common Threads::Threads) +target_link_libraries (zstd_test PRIVATE ${ZSTD_LIBRARY} common Threads::Threads) add_executable (mutator mutator.cpp) -target_link_libraries (mutator clickhouse_common_io) +target_link_libraries(mutator PRIVATE clickhouse_common_io) add_executable (decompress_perf decompress_perf.cpp) -target_link_libraries (decompress_perf clickhouse_common_io) +target_link_libraries(decompress_perf PRIVATE clickhouse_common_io ${LZ4_LIBRARY}) if (NOT USE_INTERNAL_ZSTD_LIBRARY) target_include_directories (zstd_test BEFORE PRIVATE ${ZSTD_INCLUDE_DIR}) diff --git a/utils/config-processor/CMakeLists.txt b/utils/config-processor/CMakeLists.txt index d25c25d2f0..e7e15d0be5 100644 --- a/utils/config-processor/CMakeLists.txt +++ b/utils/config-processor/CMakeLists.txt @@ -1,4 +1,4 @@ add_executable (config-processor config-processor.cpp) -target_link_libraries (config-processor clickhouse_common_config) +target_link_libraries(config-processor PRIVATE clickhouse_common_config) INSTALL(TARGETS config-processor RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT config-processor) diff --git a/utils/corrector_utf8/CMakeLists.txt b/utils/corrector_utf8/CMakeLists.txt index d9c971a6cc..9114f3f58a 100644 --- a/utils/corrector_utf8/CMakeLists.txt +++ b/utils/corrector_utf8/CMakeLists.txt @@ -1,7 +1,6 @@ - add_executable(corrector_utf8 corrector_utf8.cpp) # Link the executable to the library. -target_link_libraries (corrector_utf8 clickhouse_common_io) +target_link_libraries(corrector_utf8 PRIVATE clickhouse_common_io) -install( TARGETS corrector_utf8 RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT corrector_utf8) +install(TARGETS corrector_utf8 RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT corrector_utf8) diff --git a/utils/fill-factor/CMakeLists.txt b/utils/fill-factor/CMakeLists.txt index fad359b63a..35a6712b11 100644 --- a/utils/fill-factor/CMakeLists.txt +++ b/utils/fill-factor/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (fill-factor main.cpp) -target_link_libraries (fill-factor clickhouse_common_io) +target_link_libraries(fill-factor PRIVATE clickhouse_common_io) diff --git a/utils/iotest/CMakeLists.txt b/utils/iotest/CMakeLists.txt index f690409a54..8f141b178f 100644 --- a/utils/iotest/CMakeLists.txt +++ b/utils/iotest/CMakeLists.txt @@ -1,9 +1,9 @@ add_executable (iotest iotest.cpp ${SRCS}) -target_link_libraries (iotest clickhouse_common_io) +target_link_libraries (iotest PRIVATE clickhouse_common_io) add_executable (iotest_nonblock iotest_nonblock.cpp ${SRCS}) -target_link_libraries (iotest_nonblock clickhouse_common_io) +target_link_libraries (iotest_nonblock PRIVATE clickhouse_common_io) add_executable (iotest_aio iotest_aio.cpp ${SRCS}) -target_link_libraries (iotest_aio clickhouse_common_io) +target_link_libraries (iotest_aio PRIVATE clickhouse_common_io) diff --git a/utils/iotest/iotest.cpp b/utils/iotest/iotest.cpp index c157e9736d..3134a49056 100644 --- a/utils/iotest/iotest.cpp +++ b/utils/iotest/iotest.cpp @@ -17,7 +17,16 @@ #include #include -using DB::throwFromErrno; +namespace DB +{ + namespace ErrorCodes + { + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_CLOSE_FILE; + extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; + extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR; + } +} enum Mode @@ -33,7 +42,9 @@ enum Mode void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block_size, size_t count) { - DB::Memory direct_buf(block_size, sysconf(_SC_PAGESIZE)); + using namespace DB; + + Memory direct_buf(block_size, sysconf(_SC_PAGESIZE)); std::vector simple_buf(block_size); char * buf; @@ -60,12 +71,12 @@ void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block if (mode & MODE_READ) { if (static_cast(block_size) != pread(fd, buf, block_size, offset)) - throwFromErrno("Cannot read"); + throwFromErrno("Cannot read", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); } else { if (static_cast(block_size) != pwrite(fd, buf, block_size, offset)) - throwFromErrno("Cannot write"); + throwFromErrno("Cannot write", ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); } } } @@ -73,6 +84,8 @@ void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block int mainImpl(int argc, char ** argv) { + using namespace DB; + const char * file_name = 0; int mode = MODE_NONE; UInt64 min_offset = 0; @@ -89,11 +102,11 @@ int mainImpl(int argc, char ** argv) } file_name = argv[1]; - min_offset = DB::parse(argv[3]); - max_offset = DB::parse(argv[4]); - block_size = DB::parse(argv[5]); - threads = DB::parse(argv[6]); - count = DB::parse(argv[7]); + min_offset = parse(argv[3]); + max_offset = parse(argv[4]); + block_size = parse(argv[5]); + threads = parse(argv[6]); + count = parse(argv[7]); for (int i = 0; argv[2][i]; ++i) { @@ -128,11 +141,11 @@ int mainImpl(int argc, char ** argv) int fd = open(file_name, ((mode & MODE_READ) ? O_RDONLY : O_WRONLY) | ((mode & MODE_SYNC) ? O_SYNC : 0)); #endif if (-1 == fd) - throwFromErrno("Cannot open file"); + throwFromErrno("Cannot open file", ErrorCodes::CANNOT_OPEN_FILE); #ifdef __APPLE__ if (mode & MODE_DIRECT) if (fcntl(fd, F_NOCACHE, 1) == -1) - throwFromErrno("Cannot open file"); + throwFromErrno("Cannot open file", ErrorCodes::CANNOT_CLOSE_FILE); #endif Stopwatch watch; @@ -145,7 +158,7 @@ int mainImpl(int argc, char ** argv) watch.stop(); if (0 != close(fd)) - throwFromErrno("Cannot close file"); + throwFromErrno("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); std::cout << std::fixed << std::setprecision(2) << "Done " << count << " * " << threads << " ops"; diff --git a/utils/iotest/iotest_aio.cpp b/utils/iotest/iotest_aio.cpp index 0274541ec1..1dcb8ea2ae 100644 --- a/utils/iotest/iotest_aio.cpp +++ b/utils/iotest/iotest_aio.cpp @@ -29,7 +29,16 @@ int main(int argc, char ** argv) { return 0; } #include -using DB::throwFromErrno; +namespace DB +{ + namespace ErrorCodes + { + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_CLOSE_FILE; + extern const int CANNOT_IO_SUBMIT; + extern const int CANNOT_IO_GETEVENTS; + } +} enum Mode @@ -41,11 +50,13 @@ enum Mode void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block_size, size_t buffers_count, size_t count) { + using namespace DB; + AIOContext ctx; - std::vector buffers(buffers_count); + std::vector buffers(buffers_count); for (size_t i = 0; i < buffers_count; ++i) - buffers[i] = DB::Memory(block_size, sysconf(_SC_PAGESIZE)); + buffers[i] = Memory(block_size, sysconf(_SC_PAGESIZE)); drand48_data rand_data; timespec times; @@ -109,13 +120,13 @@ void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block /// Send queries. if (io_submit(ctx.ctx, query_cbs.size(), &query_cbs[0]) < 0) - throwFromErrno("io_submit failed"); + throwFromErrno("io_submit failed", ErrorCodes::CANNOT_IO_SUBMIT); /// Receive answers. If we have something else to send, then receive at least one answer (after that send them), otherwise wait all answers. memset(&events[0], 0, buffers_count * sizeof(events[0])); int evs = io_getevents(ctx.ctx, (blocks_sent < count ? 1 : in_progress), buffers_count, &events[0], nullptr); if (evs < 0) - throwFromErrno("io_getevents failed"); + throwFromErrno("io_getevents failed", ErrorCodes::CANNOT_IO_GETEVENTS); for (int i = 0; i < evs; ++i) { @@ -131,6 +142,8 @@ void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block int mainImpl(int argc, char ** argv) { + using namespace DB; + const char * file_name = 0; int mode = MODE_READ; UInt64 min_offset = 0; @@ -149,16 +162,16 @@ int mainImpl(int argc, char ** argv) file_name = argv[1]; if (argv[2][0] == 'w') mode = MODE_WRITE; - min_offset = DB::parse(argv[3]); - max_offset = DB::parse(argv[4]); - block_size = DB::parse(argv[5]); - threads_count = DB::parse(argv[6]); - buffers_count = DB::parse(argv[7]); - count = DB::parse(argv[8]); + min_offset = parse(argv[3]); + max_offset = parse(argv[4]); + block_size = parse(argv[5]); + threads_count = parse(argv[6]); + buffers_count = parse(argv[7]); + count = parse(argv[8]); int fd = open(file_name, ((mode == MODE_READ) ? O_RDONLY : O_WRONLY) | O_DIRECT); if (-1 == fd) - throwFromErrno("Cannot open file"); + throwFromErrno("Cannot open file", ErrorCodes::CANNOT_OPEN_FILE); ThreadPool pool(threads_count); @@ -171,7 +184,7 @@ int mainImpl(int argc, char ** argv) watch.stop(); if (0 != close(fd)) - throwFromErrno("Cannot close file"); + throwFromErrno("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); std::cout << std::fixed << std::setprecision(2) << "Done " << count << " * " << threads_count << " ops"; diff --git a/utils/iotest/iotest_nonblock.cpp b/utils/iotest/iotest_nonblock.cpp index 45ab62f60c..f85e8df91f 100644 --- a/utils/iotest/iotest_nonblock.cpp +++ b/utils/iotest/iotest_nonblock.cpp @@ -20,7 +20,18 @@ #include #include -using DB::throwFromErrno; +namespace DB +{ + namespace ErrorCodes + { + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_CLOSE_FILE; + extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; + extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR; + extern const int CANNOT_FSYNC; + extern const int SYSTEM_ERROR; + } +} enum Mode @@ -32,6 +43,8 @@ enum Mode int mainImpl(int argc, char ** argv) { + using namespace DB; + const char * file_name = 0; Mode mode = MODE_READ; UInt64 min_offset = 0; @@ -47,11 +60,11 @@ int mainImpl(int argc, char ** argv) } file_name = argv[1]; - min_offset = DB::parse(argv[3]); - max_offset = DB::parse(argv[4]); - block_size = DB::parse(argv[5]); - descriptors = DB::parse(argv[6]); - count = DB::parse(argv[7]); + min_offset = parse(argv[3]); + max_offset = parse(argv[4]); + block_size = parse(argv[5]); + descriptors = parse(argv[6]); + count = parse(argv[7]); if (!strcmp(argv[2], "r")) mode = MODE_READ; @@ -65,7 +78,7 @@ int mainImpl(int argc, char ** argv) { fds[i] = open(file_name, O_SYNC | ((mode == MODE_READ) ? O_RDONLY : O_WRONLY)); if (-1 == fds[i]) - throwFromErrno("Cannot open file"); + throwFromErrno("Cannot open file", ErrorCodes::CANNOT_OPEN_FILE); } std::vector buf(block_size); @@ -87,7 +100,7 @@ int mainImpl(int argc, char ** argv) while (ops < count) { if (poll(&polls[0], descriptors, -1) <= 0) - throwFromErrno("poll failed"); + throwFromErrno("poll failed", ErrorCodes::SYSTEM_ERROR); for (size_t i = 0; i < descriptors; ++i) { if (!polls[i].revents) @@ -109,12 +122,12 @@ int mainImpl(int argc, char ** argv) if (mode == MODE_READ) { if (static_cast(block_size) != pread(fds[i], &buf[0], block_size, offset)) - throwFromErrno("Cannot read"); + throwFromErrno("Cannot read", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); } else { if (static_cast(block_size) != pwrite(fds[i], &buf[0], block_size, offset)) - throwFromErrno("Cannot write"); + throwFromErrno("Cannot write", ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); } } } @@ -122,7 +135,7 @@ int mainImpl(int argc, char ** argv) for (size_t i = 0; i < descriptors; ++i) { if (fsync(fds[i])) - throwFromErrno("Cannot fsync"); + throwFromErrno("Cannot fsync", ErrorCodes::CANNOT_FSYNC); } watch.stop(); @@ -130,7 +143,7 @@ int mainImpl(int argc, char ** argv) for (size_t i = 0; i < descriptors; ++i) { if (0 != close(fds[i])) - throwFromErrno("Cannot close file"); + throwFromErrno("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); } std::cout << std::fixed << std::setprecision(2) diff --git a/utils/test-data-generator/CMakeLists.txt b/utils/test-data-generator/CMakeLists.txt index 9f1a0bf5a0..60f1789470 100644 --- a/utils/test-data-generator/CMakeLists.txt +++ b/utils/test-data-generator/CMakeLists.txt @@ -1,5 +1,5 @@ add_executable (test-data-generator main.cpp) -target_link_libraries (test-data-generator clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries(test-data-generator PRIVATE clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) add_executable (markov-model markov-model.cpp) -target_link_libraries (markov-model clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries(markov-model PRIVATE clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) diff --git a/utils/wikistat-loader/CMakeLists.txt b/utils/wikistat-loader/CMakeLists.txt index 84d00e91bd..7f72cbb9f4 100644 --- a/utils/wikistat-loader/CMakeLists.txt +++ b/utils/wikistat-loader/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (wikistat-loader main.cpp ${SRCS}) -target_link_libraries (wikistat-loader clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (wikistat-loader PRIVATE clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) diff --git a/utils/zookeeper-cli/CMakeLists.txt b/utils/zookeeper-cli/CMakeLists.txt index 27b18bfc1f..89db7922ed 100644 --- a/utils/zookeeper-cli/CMakeLists.txt +++ b/utils/zookeeper-cli/CMakeLists.txt @@ -1,5 +1,5 @@ add_executable(clickhouse-zookeeper-cli zookeeper-cli.cpp) -target_link_libraries(clickhouse-zookeeper-cli clickhouse_common_zookeeper ${LINE_EDITING_LIBS}) +target_link_libraries(clickhouse-zookeeper-cli PRIVATE clickhouse_common_zookeeper ${LINE_EDITING_LIBS}) if (READLINE_INCLUDE_DIR) target_include_directories (clickhouse-zookeeper-cli SYSTEM PRIVATE ${READLINE_INCLUDE_DIR}) endif () diff --git a/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt b/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt index 4b59b838d6..34f2e608ef 100644 --- a/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt +++ b/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (zookeeper-create-entry-to-download-part main.cpp ${SRCS}) -target_link_libraries (zookeeper-create-entry-to-download-part dbms ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (zookeeper-create-entry-to-download-part PRIVATE dbms clickhouse_common_zookeeper ${Boost_PROGRAM_OPTIONS_LIBRARY}) diff --git a/utils/zookeeper-dump-tree/CMakeLists.txt b/utils/zookeeper-dump-tree/CMakeLists.txt index ee28f42143..d2947fa893 100644 --- a/utils/zookeeper-dump-tree/CMakeLists.txt +++ b/utils/zookeeper-dump-tree/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (zookeeper-dump-tree main.cpp ${SRCS}) -target_link_libraries (zookeeper-dump-tree clickhouse_common_zookeeper ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries(zookeeper-dump-tree PRIVATE clickhouse_common_zookeeper clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) diff --git a/utils/zookeeper-remove-by-list/CMakeLists.txt b/utils/zookeeper-remove-by-list/CMakeLists.txt index 494ef819f4..ba112bab9c 100644 --- a/utils/zookeeper-remove-by-list/CMakeLists.txt +++ b/utils/zookeeper-remove-by-list/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (zookeeper-remove-by-list main.cpp ${SRCS}) -target_link_libraries (zookeeper-remove-by-list clickhouse_common_zookeeper ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries(zookeeper-remove-by-list PRIVATE clickhouse_common_zookeeper ${Boost_PROGRAM_OPTIONS_LIBRARY})