From 2751d1e7e1424eec158f79677bb00f100f4e7a3a Mon Sep 17 00:00:00 2001 From: "aleksei.bukhalov" Date: Fri, 19 Jun 2026 13:58:23 +0000 Subject: [PATCH] Add MariaDB with DuckDB storage engine (ha_duckdb) Results on c6a.4xlarge, branch bb-11.4-duckdb-jemalloc: - Sequential hot: 25.3s (DuckDB native: 26.3s) - Concurrent QPS: 1.198, error ratio: 0.000 (native: 0.705, 0.998) - Load time: 375.8s, data size: 46.9 GB --- mariadb-duckdb/benchmark.sh | 6 + mariadb-duckdb/check | 3 + mariadb-duckdb/create.sql | 109 ++++++++ mariadb-duckdb/data-size | 14 ++ mariadb-duckdb/install | 48 ++++ mariadb-duckdb/load | 34 +++ mariadb-duckdb/queries.sql | 43 ++++ mariadb-duckdb/query | 38 +++ .../results/20260619/c6a.4xlarge.json | 236 ++++++++++++++++++ mariadb-duckdb/start | 14 ++ mariadb-duckdb/stop | 2 + mariadb-duckdb/template.json | 8 + 12 files changed, 555 insertions(+) create mode 100755 mariadb-duckdb/benchmark.sh create mode 100755 mariadb-duckdb/check create mode 100644 mariadb-duckdb/create.sql create mode 100755 mariadb-duckdb/data-size create mode 100755 mariadb-duckdb/install create mode 100755 mariadb-duckdb/load create mode 100644 mariadb-duckdb/queries.sql create mode 100755 mariadb-duckdb/query create mode 100644 mariadb-duckdb/results/20260619/c6a.4xlarge.json create mode 100755 mariadb-duckdb/start create mode 100755 mariadb-duckdb/stop create mode 100644 mariadb-duckdb/template.json diff --git a/mariadb-duckdb/benchmark.sh b/mariadb-duckdb/benchmark.sh new file mode 100755 index 0000000000..aacd122b73 --- /dev/null +++ b/mariadb-duckdb/benchmark.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Thin shim — actual flow is in ../lib/benchmark-common.sh. +export BENCH_DOWNLOAD_SCRIPT="download-hits-tsv" +export BENCH_DURABLE=yes +export BENCH_RESTARTABLE=yes +exec ../lib/benchmark-common.sh diff --git a/mariadb-duckdb/check b/mariadb-duckdb/check new file mode 100755 index 0000000000..4cc5de1d09 --- /dev/null +++ b/mariadb-duckdb/check @@ -0,0 +1,3 @@ +#!/bin/bash +set -e +sudo mariadb -e "SELECT 1" >/dev/null diff --git a/mariadb-duckdb/create.sql b/mariadb-duckdb/create.sql new file mode 100644 index 0000000000..5ca83246fe --- /dev/null +++ b/mariadb-duckdb/create.sql @@ -0,0 +1,109 @@ +CREATE TABLE hits +( + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT NOT NULL, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT NOT NULL, + Referer TEXT NOT NULL, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT NOT NULL, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT NOT NULL, + Params TEXT NOT NULL, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT NOT NULL, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT NOT NULL, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT NOT NULL, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT NOT NULL, + BrowserCountry TEXT NOT NULL, + SocialNetwork TEXT NOT NULL, + SocialAction TEXT NOT NULL, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT NOT NULL, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT NOT NULL, + ParamCurrency TEXT NOT NULL, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT NOT NULL, + OpenstatCampaignID TEXT NOT NULL, + OpenstatAdID TEXT NOT NULL, + OpenstatSourceID TEXT NOT NULL, + UTMSource TEXT NOT NULL, + UTMMedium TEXT NOT NULL, + UTMCampaign TEXT NOT NULL, + UTMContent TEXT NOT NULL, + UTMTerm TEXT NOT NULL, + FromTag TEXT NOT NULL, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL, + PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) +) ENGINE=DuckDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; diff --git a/mariadb-duckdb/data-size b/mariadb-duckdb/data-size new file mode 100755 index 0000000000..6131b19e9e --- /dev/null +++ b/mariadb-duckdb/data-size @@ -0,0 +1,14 @@ +#!/bin/bash +set -eu + +# DuckDB engine stores all data in a single duckdb.db file in the MariaDB data +# directory. information_schema.TABLES does not report DuckDB storage size, so +# we measure the file directly. +DUCKDB_FILE="/var/lib/mysql/duckdb.db" + +if [ ! -f "$DUCKDB_FILE" ]; then + echo "0" + exit 0 +fi + +du -sb "$DUCKDB_FILE" | awk '{print $1}' diff --git a/mariadb-duckdb/install b/mariadb-duckdb/install new file mode 100755 index 0000000000..8baf9c88f5 --- /dev/null +++ b/mariadb-duckdb/install @@ -0,0 +1,48 @@ +#!/bin/bash +set -eu + +CI_SOURCES_URL="https://ci.mariadb.org/68929/amd64-ubuntu-2404-deb-autobake/mariadb.sources" + +# Idempotent: if DuckDB plugin is already active, nothing to do. +if sudo mariadb -e "SELECT PLUGIN_STATUS FROM information_schema.PLUGINS WHERE PLUGIN_NAME='DuckDB';" 2>/dev/null | grep -q 'ACTIVE'; then + echo "MariaDB with DuckDB engine already installed and active." + exit 0 +fi + +# Remove any conflicting MariaDB/MySQL installation to avoid package conflicts. +# On a fresh EC2 instance Ubuntu 24.04 may ship with mariadb stubs. +if dpkg -l | grep -qE '^ii\s+(mariadb-server|mysql-server)'; then + echo "Removing existing MariaDB/MySQL installation..." + sudo systemctl stop mariadb mysql 2>/dev/null || true + sudo apt-get purge -y 'mariadb-server*' 'mariadb-client*' 'mariadb-common*' \ + 'mysql-server*' 'mysql-client*' 'libmariadb*' 'libmysqlclient*' 2>/dev/null || true + sudo rm -rf /var/lib/mysql /etc/mysql +fi + +# Add MariaDB CI build as apt source (Trusted: yes — unsigned CI packages, testing only). +sudo wget -q -O /etc/apt/sources.list.d/mariadb-duckdb-ci.sources "$CI_SOURCES_URL" + +sudo apt-get update -y +sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \ + mariadb-server mariadb-client + +# Configure the DuckDB storage engine plugin (from mariadb.org article defaults). +sudo tee /etc/mysql/mariadb.conf.d/duckdb.cnf >/dev/null <<'EOF' +[mysqld] +plugin-maturity=alpha +plugin-load-add=ha_duckdb.so +duckdb-memory-limit=24G +EOF + +sudo systemctl enable mariadb +sudo systemctl restart mariadb + +# Smoke-test: DuckDB plugin must be ACTIVE. +sudo mariadb -e "SELECT PLUGIN_STATUS FROM information_schema.PLUGINS WHERE PLUGIN_NAME='DuckDB';" \ + | grep -q 'ACTIVE' || { echo "ERROR: DuckDB plugin not active after restart" >&2; exit 1; } + +# Allow the mysql user (which runs DuckDB embedded) to traverse the home +# directory so COPY FROM can read dataset files anywhere under ~. +sudo chmod o+x /home/ubuntu + +echo "MariaDB with DuckDB engine installed and ready." diff --git a/mariadb-duckdb/load b/mariadb-duckdb/load new file mode 100755 index 0000000000..bcda9c407e --- /dev/null +++ b/mariadb-duckdb/load @@ -0,0 +1,34 @@ +#!/bin/bash +set -eu + +# Helper: run a DuckDB SQL statement through MariaDB's run_in_duckdb UDF. +# Single quotes inside $1 are doubled so they survive the MariaDB string literal. +duck() { + local esc + esc=$(printf '%s' "$1" | sed "s/'/''/g") + sudo mariadb -N -e "SELECT run_in_duckdb('$esc')" +} + +sudo mariadb -e "DROP DATABASE IF EXISTS test" +sudo mariadb -e "CREATE DATABASE test" +sudo mariadb test < create.sql + +# Load via DuckDB's native vectorized CSV reader (12x faster than LOAD DATA LOCAL +# INFILE which routes every row through MariaDB's single-threaded write_row path). +# hits.tsv is tab-delimited with datetime strings in TIMESTAMP columns, matching +# the table schema — no type conversion needed. +# +# DuckDB glob expander requires read permission on all parent directories. +# /home/ubuntu is mode 751 (o+x only) so DuckDB can traverse but not list it, +# causing "No files found" errors. Hardlink to /var/lib/mysql/ (mysql-owned, +# mode 755) resolves this instantly without copying 70 GB. +HITS_LINK=/var/lib/mysql/hits_load.tsv +sudo ln "$(realpath hits.tsv)" "$HITS_LINK" +sudo chmod 644 "$HITS_LINK" +# NULLSTR '\N' matches MySQL/MariaDB convention: only \N is NULL, empty fields → ''. +# Without this, DuckDB treats every empty TSV field as NULL → NOT NULL violations. +duck "COPY test.hits FROM '$HITS_LINK' (FORMAT CSV, DELIMITER '\t', NULLSTR '\N')" + +sudo rm -f "$HITS_LINK" +rm -f hits.tsv +sync diff --git a/mariadb-duckdb/queries.sql b/mariadb-duckdb/queries.sql new file mode 100644 index 0000000000..bdfc87c654 --- /dev/null +++ b/mariadb-duckdb/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; +SELECT AVG(UserID) FROM hits; +SELECT COUNT(DISTINCT UserID) FROM hits; +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; +SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID FROM hits WHERE UserID = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +SELECT CONCAT(YEAR(EventTime),'-',LPAD(CAST(MONTH(EventTime) AS CHAR),2,'0'),'-',LPAD(CAST(DAY(EventTime) AS CHAR),2,'0'),' ',LPAD(CAST(HOUR(EventTime) AS CHAR),2,'0'),':',LPAD(CAST(MINUTE(EventTime) AS CHAR),2,'0'),':00') AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY M ORDER BY M LIMIT 10 OFFSET 1000; diff --git a/mariadb-duckdb/query b/mariadb-duckdb/query new file mode 100755 index 0000000000..eeb841fd17 --- /dev/null +++ b/mariadb-duckdb/query @@ -0,0 +1,38 @@ +#!/bin/bash +# Reads a SQL query from stdin, runs it via `mariadb -vvv` against the `test` DB. +# Stdout: query result. +# Stderr: query runtime in fractional seconds on the last line (parsed from +# mariadb's "N rows in set (X.XX sec)" footer; days/hours/min/sec all handled). +# Exit non-zero on error. +set -e + +query=$(cat) + +out=$(sudo mariadb test -vvv -e "$query" 2>&1) && exit_code=0 || exit_code=$? + +if [ "$exit_code" -ne 0 ] || printf '%s\n' "$out" | grep -q '^ERROR'; then + printf '%s\n' "$out" >&2 + exit 1 +fi + +printf '%s\n' "$out" + +# mariadb may print "(2 days 3 hours 4 min 5.6 sec)" or any subset. +parsed=$(printf '%s\n' "$out" \ + | grep -P 'rows? in set|Empty set|Query OK' \ + | tail -n1 \ + | sed -r 's/^.*?\((([0-9.]+) days? )?(([0-9.]+) hours? )?(([0-9.]+) min )?([0-9.]+) sec\).*?$/\2,\4,\6,\7/') + +if [ -z "$parsed" ]; then + echo "no timing in mariadb output" >&2 + exit 1 +fi + +awk -F, -v p="$parsed" 'BEGIN { + n = split(p, a, ",") + d = (a[1] == "") ? 0 : a[1] + h = (a[2] == "") ? 0 : a[2] + m = (a[3] == "") ? 0 : a[3] + s = (a[4] == "") ? 0 : a[4] + printf "%.3f\n", d * 86400 + h * 3600 + m * 60 + s +}' >&2 diff --git a/mariadb-duckdb/results/20260619/c6a.4xlarge.json b/mariadb-duckdb/results/20260619/c6a.4xlarge.json new file mode 100644 index 0000000000..80ae5c77a6 --- /dev/null +++ b/mariadb-duckdb/results/20260619/c6a.4xlarge.json @@ -0,0 +1,236 @@ +{ + "system": "MariaDB with DuckDB storage engine", + "date": "2026-06-19", + "machine": "c6a.4xlarge", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": [ + "C++", + "column-oriented", + "MySQL compatible", + "embedded DuckDB" + ], + "load_time": 375.776, + "data_size": 46980411392, + "concurrent_qps": 1.198, + "concurrent_error_ratio": 0.0, + "result": [ + [ + 0.042, + 0.002, + 0.002 + ], + [ + 0.177, + 0.007, + 0.006 + ], + [ + 0.621, + 0.023, + 0.023 + ], + [ + 0.418, + 0.035, + 0.035 + ], + [ + 0.582, + 0.307, + 0.305 + ], + [ + 1.226, + 0.438, + 0.436 + ], + [ + 0.11, + 0.003, + 0.003 + ], + [ + 0.297, + 0.009, + 0.009 + ], + [ + 1.552, + 0.391, + 0.39 + ], + [ + 2.757, + 0.54, + 0.538 + ], + [ + 1.3, + 0.132, + 0.129 + ], + [ + 1.542, + 0.144, + 0.141 + ], + [ + 1.726, + 0.408, + 0.405 + ], + [ + 2.837, + 0.727, + 0.739 + ], + [ + 1.747, + 0.458, + 0.463 + ], + [ + 0.594, + 0.355, + 0.352 + ], + [ + 2.758, + 0.876, + 0.9 + ], + [ + 2.522, + 0.658, + 0.678 + ], + [ + 5.596, + 3.007, + 2.983 + ], + [ + 0.645, + 0.006, + 0.006 + ], + [ + 17.868, + 0.492, + 0.487 + ], + [ + 19.477, + 0.495, + 0.492 + ], + [ + 25.421, + 1.68, + 0.651 + ], + [ + 0.689, + 0.077, + 0.079 + ], + [ + 0.467, + 0.028, + 0.028 + ], + [ + 1.2, + 0.174, + 0.172 + ], + [ + 0.31, + 0.033, + 0.027 + ], + [ + 18.033, + 0.375, + 0.375 + ], + [ + 13.007, + 7.588, + 7.57 + ], + [ + 0.285, + 0.029, + 0.029 + ], + [ + 4.447, + 0.356, + 0.358 + ], + [ + 7.365, + 0.435, + 0.438 + ], + [ + 6.273, + 1.693, + 1.743 + ], + [ + 18.173, + 1.695, + 1.697 + ], + [ + 18.135, + 1.833, + 1.84 + ], + [ + 0.654, + 0.46, + 0.461 + ], + [ + 0.152, + 0.035, + 0.03 + ], + [ + 0.11, + 0.01, + 0.01 + ], + [ + 0.131, + 0.011, + 0.011 + ], + [ + 0.216, + 0.06, + 0.063 + ], + [ + 0.138, + 0.008, + 0.007 + ], + [ + 0.141, + 0.007, + 0.007 + ], + [ + 0.235, + 0.15, + 0.151 + ] + ] +} diff --git a/mariadb-duckdb/start b/mariadb-duckdb/start new file mode 100755 index 0000000000..ddbe84ca78 --- /dev/null +++ b/mariadb-duckdb/start @@ -0,0 +1,14 @@ +#!/bin/bash +set -eu + +if sudo mariadb -e "SELECT 1" >/dev/null 2>&1; then + exit 0 +fi +sudo systemctl start mariadb +# Wait up to 30s for the server to accept connections. +for _ in $(seq 1 30); do + sudo mariadb -e "SELECT 1" >/dev/null 2>&1 && exit 0 + sleep 1 +done +echo "ERROR: mariadb did not start within 30s" >&2 +exit 1 diff --git a/mariadb-duckdb/stop b/mariadb-duckdb/stop new file mode 100755 index 0000000000..1b523d374d --- /dev/null +++ b/mariadb-duckdb/stop @@ -0,0 +1,2 @@ +#!/bin/bash +sudo systemctl stop mariadb || true diff --git a/mariadb-duckdb/template.json b/mariadb-duckdb/template.json new file mode 100644 index 0000000000..3f3d6c0d0b --- /dev/null +++ b/mariadb-duckdb/template.json @@ -0,0 +1,8 @@ +{ + "system": "MariaDB with DuckDB storage engine", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++", "column-oriented", "MySQL compatible", "embedded DuckDB"] +}