benchmark: updated log files for vertica[#CONV-8419]

This commit is contained in:
Pavel Kartavyy 2013-08-29 11:56:37 +00:00
parent 8911c682e7
commit fc7a15a01d
5 changed files with 133 additions and 121 deletions

View File

@ -1,13 +1,14 @@
#!/bin/bash
# script to run query to databases
if [ "$#" != "2" ]; then
if [[ "$#" != "2" && "$#" != "3" ]]; then
echo "script to run request for database."
echo "usage: query_file expect_file"
echo "usage: query_file expect_file [etc_init_d_service]"
exit 1
fi
test_file=$1
expect_file=$2
etc_init_d_service=$3
TIMES=3
@ -45,9 +46,11 @@ function execute()
echo "query:" $query
expect -f $expect_file "$query"
if [ "$?" != "0" ]; then
echo "Error: $?"
#break
if [ -e $etc_init_d_service ]; then
sudo $etc_init_d_service status
if [ "$?" != "0" ]; then
sudo $etc_init_d_service restart
fi
fi
done
fi
@ -57,4 +60,7 @@ function execute()
}
mapfile -t test_queries < $test_file
echo "start time: $(date)"
time execute "${test_queries[@]}"
echo "stop time: $(date)"

View File

@ -1,10 +1,15 @@
path=/opt/dump/dump_0.3
db_name=hits_10m
num=10000000
db_name=hits_1b
num=1000000000
dump_replaced=$path/dump_"$db_name"_replaced.tsv
dump_meshed=$path/dump_"$db_name"_meshed.tsv
clickhouse-client --query="SELECT toInt64(WatchID), JavaEnable, Title, GoodEvent, (EventTime < toDateTime('1971-01-01 00:00:00') ? toDateTime('1971-01-01 00:00:01') : EventTime), (EventDate < toDate('1971-01-01') ? toDate('1971-01-01') : EventDate), CounterID, ClientIP, RegionID, toInt64(UserID), CounterClass, OS, UserAgent, URL, Referer, Refresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, (ClientEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : ClientEventTime), SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, toInt64(FUniqID), OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, (LocalEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : LocalEventTime), Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, toInt64(RefererHash), toInt64(URLHash), CLID, toInt64(intHash32(UserID)) FROM hits_mt_test LIMIT $num FORMAT TabSeparated" > $dump_replaced
clickhouse-client --query="SET GLOBAL max_block_size=100000"
clickhouse-client --query="SET GLOBAL max_threads=1"
clickhouse-client --query="SELECT toInt64(WatchID), JavaEnable, Title, GoodEvent, (EventTime < toDateTime('1971-01-01 00:00:00') ? toDateTime('1971-01-01 00:00:01') : EventTime), (EventDate < toDate('1971-01-01') ? toDate('1971-01-01') : EventDate), CounterID, ClientIP, RegionID, toInt64(UserID), CounterClass, OS, UserAgent, URL, Referer, Refresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, (ClientEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : ClientEventTime), SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, toInt64(FUniqID), OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, (LocalEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : LocalEventTime), Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, toInt64(RefererHash), toInt64(URLHash), CLID, toInt64(intHash32(UserID)) FROM hits_mt_test_1b LIMIT $num FORMAT TabSeparated" > $dump_replaced
/etc/init.d/clickhouse-server-metrika-yandex-ulimit restart
sudo nsort -format=maximum_size:65535 -k1 -T /opt -o $dump_meshed $dump_replaced

View File

@ -2,7 +2,7 @@
#!/bin/expect
# Set timeout
set timeout 10
set timeout 600
# Get arguments
set query [lindex $argv 0]

View File

@ -1,8 +1,10 @@
\timing
create table hits_10m_meshed
(
WatchID INTEGER,
JavaEnable INTEGER,
Title VARCHAR(128),
Title VARCHAR(1024),
GoodEvent INTEGER,
EventTime DATETIME,
EventDate DATE,
@ -13,8 +15,8 @@ create table hits_10m_meshed
CounterClass INTEGER,
OS INTEGER,
UserAgent INTEGER,
URL VARCHAR(128),
Referer VARCHAR(128),
URL VARCHAR(6072),
Referer VARCHAR(2048),
Refresh INTEGER,
RefererCategoryID INTEGER,
RefererRegionID INTEGER,
@ -25,7 +27,7 @@ create table hits_10m_meshed
ResolutionDepth INTEGER,
FlashMajor INTEGER,
FlashMinor INTEGER,
FlashMinor2 VARCHAR(128),
FlashMinor2 VARCHAR(256),
NetMajor INTEGER,
NetMinor INTEGER,
UserAgentMajor INTEGER,
@ -34,12 +36,12 @@ create table hits_10m_meshed
JavascriptEnable INTEGER,
IsMobile INTEGER,
MobilePhone INTEGER,
MobilePhoneModel VARCHAR(128),
Params VARCHAR(128),
MobilePhoneModel VARCHAR(80),
Params VARCHAR(2048),
IPNetworkID INTEGER,
TraficSourceID INTEGER,
SearchEngineID INTEGER,
SearchPhrase VARCHAR(128),
SearchPhrase VARCHAR(1024),
AdvEngineID INTEGER,
IsArtifical INTEGER,
WindowClientWidth INTEGER,
@ -50,13 +52,13 @@ create table hits_10m_meshed
SilverlightVersion2 INTEGER,
SilverlightVersion3 INTEGER,
SilverlightVersion4 INTEGER,
PageCharset VARCHAR(128),
PageCharset VARCHAR(80),
CodeVersion INTEGER,
IsLink INTEGER,
IsDownload INTEGER,
IsNotBounce INTEGER,
FUniqID INTEGER,
OriginalURL VARCHAR(128),
OriginalURL VARCHAR(6072),
HID INTEGER,
IsOldCounter INTEGER,
IsEvent INTEGER,
@ -88,37 +90,37 @@ create table hits_10m_meshed
SocialSourceNetworkID INTEGER,
SocialSourcePage VARCHAR(128),
ParamPrice INTEGER,
ParamOrderID VARCHAR(128),
ParamOrderID VARCHAR(80),
ParamCurrency CHAR(3),
ParamCurrencyID INTEGER,
OpenstatServiceName VARCHAR(128),
OpenstatCampaignID VARCHAR(128),
OpenstatAdID VARCHAR(128),
OpenstatSourceID VARCHAR(128),
UTMSource VARCHAR(128),
UTMMedium VARCHAR(128),
UTMCampaign VARCHAR(128),
UTMContent VARCHAR(128),
UTMTerm VARCHAR(128),
FromTag VARCHAR(128),
OpenstatServiceName VARCHAR(80),
OpenstatCampaignID VARCHAR(80),
OpenstatAdID VARCHAR(80),
OpenstatSourceID VARCHAR(80),
UTMSource VARCHAR(256),
UTMMedium VARCHAR(256),
UTMCampaign VARCHAR(256),
UTMContent VARCHAR(256),
UTMTerm VARCHAR(256),
FromTag VARCHAR(256),
HasGCLID INTEGER,
RefererHash INTEGER,
URLHash INTEGER,
CLID INTEGER,
UserIDHash INTEGER
);
) ORDER BY CounterID, EventDate, UserIDHash, EventTime;
create projection if not exists hits_10m_meshed_p AS SELECT * FROM hits_10m_meshed ORDER BY CounterID, EventDate, UserIDHash, EventTime;
\set input_file '''/opt/dump/dump_0.3/dump_hits_10m_meshed.tsv'''
COPY hits_10m_meshed FROM :input_file DELIMITER E'\t' DIRECT;
create table hits_100m_meshed
(
WatchID INTEGER,
JavaEnable INTEGER,
Title VARCHAR(128),
Title VARCHAR(1024),
GoodEvent INTEGER,
EventTime DATETIME,
EventDate DATE,
@ -129,8 +131,8 @@ create table hits_100m_meshed
CounterClass INTEGER,
OS INTEGER,
UserAgent INTEGER,
URL VARCHAR(128),
Referer VARCHAR(128),
URL VARCHAR(6072),
Referer VARCHAR(2048),
Refresh INTEGER,
RefererCategoryID INTEGER,
RefererRegionID INTEGER,
@ -141,7 +143,7 @@ create table hits_100m_meshed
ResolutionDepth INTEGER,
FlashMajor INTEGER,
FlashMinor INTEGER,
FlashMinor2 VARCHAR(128),
FlashMinor2 VARCHAR(256),
NetMajor INTEGER,
NetMinor INTEGER,
UserAgentMajor INTEGER,
@ -150,12 +152,12 @@ create table hits_100m_meshed
JavascriptEnable INTEGER,
IsMobile INTEGER,
MobilePhone INTEGER,
MobilePhoneModel VARCHAR(128),
Params VARCHAR(128),
MobilePhoneModel VARCHAR(80),
Params VARCHAR(2048),
IPNetworkID INTEGER,
TraficSourceID INTEGER,
SearchEngineID INTEGER,
SearchPhrase VARCHAR(128),
SearchPhrase VARCHAR(1024),
AdvEngineID INTEGER,
IsArtifical INTEGER,
WindowClientWidth INTEGER,
@ -166,13 +168,13 @@ create table hits_100m_meshed
SilverlightVersion2 INTEGER,
SilverlightVersion3 INTEGER,
SilverlightVersion4 INTEGER,
PageCharset VARCHAR(128),
PageCharset VARCHAR(80),
CodeVersion INTEGER,
IsLink INTEGER,
IsDownload INTEGER,
IsNotBounce INTEGER,
FUniqID INTEGER,
OriginalURL VARCHAR(128),
OriginalURL VARCHAR(6072),
HID INTEGER,
IsOldCounter INTEGER,
IsEvent INTEGER,
@ -204,36 +206,37 @@ create table hits_100m_meshed
SocialSourceNetworkID INTEGER,
SocialSourcePage VARCHAR(128),
ParamPrice INTEGER,
ParamOrderID VARCHAR(128),
ParamOrderID VARCHAR(80),
ParamCurrency CHAR(3),
ParamCurrencyID INTEGER,
OpenstatServiceName VARCHAR(128),
OpenstatCampaignID VARCHAR(128),
OpenstatAdID VARCHAR(128),
OpenstatSourceID VARCHAR(128),
UTMSource VARCHAR(128),
UTMMedium VARCHAR(128),
UTMCampaign VARCHAR(128),
UTMContent VARCHAR(128),
UTMTerm VARCHAR(128),
FromTag VARCHAR(128),
OpenstatServiceName VARCHAR(80),
OpenstatCampaignID VARCHAR(80),
OpenstatAdID VARCHAR(80),
OpenstatSourceID VARCHAR(80),
UTMSource VARCHAR(256),
UTMMedium VARCHAR(256),
UTMCampaign VARCHAR(256),
UTMContent VARCHAR(256),
UTMTerm VARCHAR(256),
FromTag VARCHAR(256),
HasGCLID INTEGER,
RefererHash INTEGER,
URLHash INTEGER,
CLID INTEGER,
UserIDHash INTEGER
);
create projection if not exists hits_100m_meshed_p AS SELECT * FROM hits_100m_meshed ORDER BY CounterID, EventDate, UserIDHash, EventTime;
) ORDER BY CounterID, EventDate, UserIDHash, EventTime;;
\set input_file '''/opt/dump/dump_0.3/dump_hits_100m_meshed.tsv'''
COPY hits_10m_meshed FROM :input_file DELIMITER E'\t' DIRECT;
COPY hits_100m_meshed FROM :input_file DELIMITER E'\t' DIRECT;
create table hits_1b_meshed
(
WatchID INTEGER,
JavaEnable INTEGER,
Title VARCHAR(128),
Title VARCHAR(1024),
GoodEvent INTEGER,
EventTime DATETIME,
EventDate DATE,
@ -244,8 +247,8 @@ create table hits_1b_meshed
CounterClass INTEGER,
OS INTEGER,
UserAgent INTEGER,
URL VARCHAR(128),
Referer VARCHAR(128),
URL VARCHAR(6072),
Referer VARCHAR(2048),
Refresh INTEGER,
RefererCategoryID INTEGER,
RefererRegionID INTEGER,
@ -256,7 +259,7 @@ create table hits_1b_meshed
ResolutionDepth INTEGER,
FlashMajor INTEGER,
FlashMinor INTEGER,
FlashMinor2 VARCHAR(128),
FlashMinor2 VARCHAR(256),
NetMajor INTEGER,
NetMinor INTEGER,
UserAgentMajor INTEGER,
@ -265,12 +268,12 @@ create table hits_1b_meshed
JavascriptEnable INTEGER,
IsMobile INTEGER,
MobilePhone INTEGER,
MobilePhoneModel VARCHAR(128),
Params VARCHAR(128),
MobilePhoneModel VARCHAR(80),
Params VARCHAR(2048),
IPNetworkID INTEGER,
TraficSourceID INTEGER,
SearchEngineID INTEGER,
SearchPhrase VARCHAR(128),
SearchPhrase VARCHAR(1024),
AdvEngineID INTEGER,
IsArtifical INTEGER,
WindowClientWidth INTEGER,
@ -281,13 +284,13 @@ create table hits_1b_meshed
SilverlightVersion2 INTEGER,
SilverlightVersion3 INTEGER,
SilverlightVersion4 INTEGER,
PageCharset VARCHAR(128),
PageCharset VARCHAR(80),
CodeVersion INTEGER,
IsLink INTEGER,
IsDownload INTEGER,
IsNotBounce INTEGER,
FUniqID INTEGER,
OriginalURL VARCHAR(128),
OriginalURL VARCHAR(6072),
HID INTEGER,
IsOldCounter INTEGER,
IsEvent INTEGER,
@ -319,28 +322,26 @@ create table hits_1b_meshed
SocialSourceNetworkID INTEGER,
SocialSourcePage VARCHAR(128),
ParamPrice INTEGER,
ParamOrderID VARCHAR(128),
ParamOrderID VARCHAR(80),
ParamCurrency CHAR(3),
ParamCurrencyID INTEGER,
OpenstatServiceName VARCHAR(128),
OpenstatCampaignID VARCHAR(128),
OpenstatAdID VARCHAR(128),
OpenstatSourceID VARCHAR(128),
UTMSource VARCHAR(128),
UTMMedium VARCHAR(128),
UTMCampaign VARCHAR(128),
UTMContent VARCHAR(128),
UTMTerm VARCHAR(128),
FromTag VARCHAR(128),
OpenstatServiceName VARCHAR(80),
OpenstatCampaignID VARCHAR(80),
OpenstatAdID VARCHAR(80),
OpenstatSourceID VARCHAR(80),
UTMSource VARCHAR(256),
UTMMedium VARCHAR(256),
UTMCampaign VARCHAR(256),
UTMContent VARCHAR(256),
UTMTerm VARCHAR(256),
FromTag VARCHAR(256),
HasGCLID INTEGER,
RefererHash INTEGER,
URLHash INTEGER,
CLID INTEGER,
UserIDHash INTEGER
);
) ORDER BY CounterID, EventDate, UserIDHash, EventTime;
create projection if not exists hits_1b_meshed_p AS SELECT * FROM hits_1b_meshed ORDER BY CounterID, EventDate, UserIDHash, EventTime;
\set input_file '''/opt/dump/dump_0.3/dump_hits_1b_meshed.tsv'''
COPY hits_1b_meshed FROM :input_file DELIMITER E'\t' DIRECT;

View File

@ -1,111 +1,111 @@
SELECT count(*) FROM hits_10m_meshed_p;
SELECT count(*) FROM hits_10m_meshed_p WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m_meshed_p;
SELECT sum_float(UserID) FROM hits_10m_meshed_p;
SELECT count(DISTINCT UserID) FROM hits_10m_meshed_p;
SELECT count(DISTINCT SearchPhrase) FROM hits_10m_meshed_p;
SELECT min(EventDate), max(EventDate) FROM hits_10m_meshed_p;
SELECT count(*) FROM hits_100m_meshed;
SELECT count(*) FROM hits_100m_meshed WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_100m_meshed;
SELECT sum_float(UserID) FROM hits_100m_meshed;
SELECT count(DISTINCT UserID) FROM hits_100m_meshed;
SELECT count(DISTINCT SearchPhrase) FROM hits_100m_meshed;
SELECT min(EventDate), max(EventDate) FROM hits_100m_meshed;
SELECT AdvEngineID, count(*) FROM hits_10m_meshed_p WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT AdvEngineID, count(*) FROM hits_100m_meshed WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
-- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.;
SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m_meshed_p GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_100m_meshed GROUP BY RegionID ORDER BY u DESC LIMIT 10;
-- агрегация, среднее количество ключей.;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m_meshed_p GROUP BY RegionID ORDER BY count(*) DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_100m_meshed GROUP BY RegionID ORDER BY count(*) DESC LIMIT 10;
-- агрегация, среднее количество ключей, несколько агрегатных функций.;
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m_meshed_p WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_100m_meshed WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
-- мощная фильтрация по строкам, затем агрегация по строкам.;
SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m_meshed_p WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_100m_meshed WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
-- мощная фильтрация по строкам, затем агрегация по паре из числа и строки.;
SELECT SearchPhrase, count(*) FROM hits_10m_meshed_p WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT SearchPhrase, count(*) FROM hits_100m_meshed WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- средняя фильтрация по строкам, затем агрегация по строкам, большое количество ключей.;
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM hits_10m_meshed_p WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM hits_100m_meshed WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
-- агрегация чуть сложнее.;
SELECT SearchEngineID, SearchPhrase, count(*) FROM hits_10m_meshed_p WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) FROM hits_100m_meshed WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- агрегация по числу и строке, большое количество ключей.;
SELECT UserID, count(*) FROM hits_10m_meshed_p GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, count(*) FROM hits_100m_meshed GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
-- агрегация по очень большому количеству ключей, может не хватить оперативки.;
SELECT UserID, SearchPhrase, count(*) FROM hits_10m_meshed_p GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_meshed GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- ещё более сложная агрегация.;
SELECT UserID, SearchPhrase, count(*) FROM hits_10m_meshed_p GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_meshed GROUP BY UserID, SearchPhrase LIMIT 10;
-- то же самое, но без сортировки.;
SELECT UserID, Minute(EventTime) AS m, SearchPhrase, count(*) FROM hits_10m_meshed_p GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, Minute(EventTime) AS m, SearchPhrase, count(*) FROM hits_100m_meshed GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- ещё более сложная агрегация, не стоит выполнять на больших таблицах.;
SELECT UserID FROM hits_10m_meshed_p WHERE UserID = 12345678901234567890;
SELECT UserID FROM hits_100m_meshed WHERE UserID = 12345678901234567890;
-- мощная фильтрация по столбцу типа UInt64.;
SELECT count(*) FROM hits_10m_meshed_p WHERE URL LIKE '%metrika%';
SELECT count(*) FROM hits_100m_meshed WHERE URL LIKE '%metrika%';
-- фильтрация по поиску подстроки в строке.;
SELECT SearchPhrase, MAX(URL), count(*) FROM hits_10m_meshed_p WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT SearchPhrase, MAX(URL), count(*) FROM hits_100m_meshed WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- вынимаем большие столбцы, фильтрация по строке.;
SELECT SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM hits_10m_meshed_p WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM hits_100m_meshed WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- чуть больше столбцы.;
SELECT * FROM hits_10m_meshed_p WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT * FROM hits_100m_meshed WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
-- плохой запрос - вынимаем все столбцы.;
SELECT SearchPhrase FROM hits_10m_meshed_p WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits_100m_meshed WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
-- большая сортировка.;
SELECT SearchPhrase FROM hits_10m_meshed_p WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM hits_100m_meshed WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
-- большая сортировка по строкам.;
SELECT SearchPhrase FROM hits_10m_meshed_p WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM hits_100m_meshed WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
-- большая сортировка по кортежу.;
SELECT CounterID, avg(length(URL)) AS l, count(*) FROM hits_10m_meshed_p WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT CounterID, avg(length(URL)) AS l, count(*) FROM hits_100m_meshed WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
-- считаем средние длины URL для крупных счётчиков.;
SELECT REGEXP_SUBSTR(Referer, '^((https?|ftp)://|(www|ftp)\.)([a-z0-9-]+(\.[a-z0-9-]+)+)([/?].*)?$', 1, 1, 'i', 4) AS key, avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM hits_10m_meshed_p WHERE Referer != '' AND isUTF8(Referer) GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT SUBSTRING(SUBSTRING(Referer, POSITION('//' IN Referer) + 2), 1, GREATEST(0, POSITION('/' IN SUBSTRING(Referer, POSITION('//' IN Referer) + 2)) - 1)) AS key, avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM hits_10m_meshed WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
-- то же самое, но с разбивкой по доменам.;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_10m_meshed_p;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_100m_meshed;
-- много тупых агрегатных функций.;
SELECT SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m_meshed_p WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY count(*) DESC LIMIT 10;
SELECT SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_100m_meshed WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY count(*) DESC LIMIT 10;
-- сложная агрегация, для больших таблиц может не хватить оперативки.;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m_meshed_p WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_100m_meshed WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
-- агрегация по двум полям, которая ничего не агрегирует. Для больших таблиц выполнить не получится.;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m_meshed_p GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_100m_meshed GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
-- то же самое, но ещё и без фильтрации.;
SELECT URL, count(*) FROM hits_10m_meshed_p GROUP BY URL ORDER BY count(*) DESC LIMIT 10;
SELECT URL, count(*) FROM hits_100m_meshed GROUP BY URL ORDER BY count(*) DESC LIMIT 10;
-- агрегация по URL.;
SELECT 1, URL, count(*) FROM hits_10m_meshed_p GROUP BY 1, URL ORDER BY count(*) DESC LIMIT 10;
SELECT 1, URL, count(*) FROM hits_100m_meshed GROUP BY 1, URL ORDER BY count(*) DESC LIMIT 10;
-- агрегация по URL и числу.;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) FROM hits_10m_meshed_p GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY count(*) DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) FROM hits_100m_meshed GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY count(*) DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits_10m_meshed_p WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits_100m_meshed WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM hits_10m_meshed_p WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM hits_100m_meshed WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits_10m_meshed_p WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT URL, count(*) AS PageViews FROM hits_100m_meshed WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN SearchEngineID = 0 AND AdvEngineID = 0 THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_10m_meshed_p WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN SearchEngineID = 0 AND AdvEngineID = 0 THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_100m_meshed WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_10m_meshed_p WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_100m_meshed WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_10m_meshed_p WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_100m_meshed WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT TIME_SLICE(EventTime, 1, 'MINUTE') AS Minute, count(*) AS PageViews FROM hits_10m_meshed_p WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;
SELECT TIME_SLICE(EventTime, 1, 'MINUTE') AS Minute, count(*) AS PageViews FROM hits_100m_meshed WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;