From f1ace81fc63800552ad9dffcc2eaf92f1cc86768 Mon Sep 17 00:00:00 2001 From: Marco Bambini Date: Fri, 29 May 2026 11:58:15 +0200 Subject: [PATCH 1/2] Add chunked payload transport --- API.md | 173 +++- CHANGELOG.md | 13 + PERFORMANCE.md | 17 +- README.md | 1 + docker/postgresql/Dockerfile | 1 + docker/postgresql/Dockerfile.debug | 4 +- .../Dockerfile.debug-no-optimization | 4 +- src/cloudsync.c | 796 +++++++++++++++++- src/cloudsync.h | 32 +- src/dbutils.c | 8 +- src/dbutils.h | 1 + src/network/network.c | 310 ++++--- src/pk.c | 50 ++ src/pk.h | 4 + src/postgresql/cloudsync.sql.in | 17 + src/postgresql/cloudsync_postgresql.c | 343 +++++++- src/postgresql/sql_postgresql.c | 36 + src/sql.h | 6 + src/sqlite/cloudsync_sqlite.c | 343 ++++++++ src/sqlite/sql_sqlite.c | 31 + test/postgresql/39_payload_chunks.sql | 201 +++++ test/postgresql/full_test.sql | 1 + test/unit.c | 232 +++++ 23 files changed, 2498 insertions(+), 126 deletions(-) create mode 100644 test/postgresql/39_payload_chunks.sql diff --git a/API.md b/API.md index 8e6e825..ec513f5 100644 --- a/API.md +++ b/API.md @@ -1,11 +1,12 @@ # API Reference -This document provides a reference for the SQLite functions provided by the `sqlite-sync` extension. +This document provides a reference for the SQL functions provided by the `sqlite-sync` extension. Unless noted otherwise, the APIs are available on both SQLite and PostgreSQL builds. ## Index - [Configuration Functions](#configuration-functions) - [`cloudsync_init()`](#cloudsync_inittable_name-crdt_algo-init_flags) + - [`cloudsync_set()`](#cloudsync_setkey-value) - [`cloudsync_enable()`](#cloudsync_enabletable_name) - [`cloudsync_disable()`](#cloudsync_disabletable_name) - [`cloudsync_is_enabled()`](#cloudsync_is_enabledtable_name) @@ -24,6 +25,10 @@ This document provides a reference for the SQLite functions provided by the `sql - [Schema Alteration Functions](#schema-alteration-functions) - [`cloudsync_begin_alter()`](#cloudsync_begin_altertable_name) - [`cloudsync_commit_alter()`](#cloudsync_commit_altertable_name) +- [Payload Functions](#payload-functions) + - [`cloudsync_payload_encode()`](#cloudsync_payload_encodetbl-pk-col_name-col_value-col_version-db_version-site_id-cl-seq) + - [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version) + - [`cloudsync_payload_apply()`](#cloudsync_payload_applypayload) - [Network Functions](#network-functions) - [`cloudsync_network_init()`](#cloudsync_network_initmanageddatabaseid) - [`cloudsync_network_cleanup()`](#cloudsync_network_cleanup) @@ -40,6 +45,37 @@ This document provides a reference for the SQLite functions provided by the `sql ## Configuration Functions +### `cloudsync_set(key, value)` + +**Description:** Stores a global CloudSync setting in the current database. Settings persist across database reopens and are loaded automatically by the extension. + +The following payload setting is supported: + +| Key | Description | Default | Minimum | +|---|---|---:|---:| +| `payload_max_chunk_size` | Maximum transport payload size generated by [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version). Values below the minimum are clamped. | `5242880` (5 MB) | `262144` (256 KB) | + +`payload_max_chunk_size` affects only chunk generation. [`cloudsync_payload_apply()`](#cloudsync_payload_applypayload) continues to accept legacy payloads, monolithic payloads, and v3 chunk-fragment payloads even when they are larger than the local setting. This preserves compatibility between peers using different settings. + +**Parameters:** + +- `key` (TEXT): The setting key. +- `value` (TEXT): The setting value. For `payload_max_chunk_size`, pass the value in bytes. + +**Returns:** SQLite returns no value. PostgreSQL returns `true` on success. + +**Example:** + +```sql +-- Use 1 MB transport chunks +SELECT cloudsync_set('payload_max_chunk_size', '1048576'); + +-- Restore the default 5 MB transport chunks +SELECT cloudsync_set('payload_max_chunk_size', '5242880'); +``` + +--- + ### `cloudsync_init(table_name, [crdt_algo], [init_flags])` **Description:** Initializes a table for `sqlite-sync` synchronization. This function is idempotent and needs to be called only once per table on each site; configurations are stored in the database and automatically loaded with the extension. @@ -409,6 +445,137 @@ SELECT cloudsync_commit_alter('my_table'); --- +## Payload Functions + +### `cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq)` + +**Description:** Encodes rows from `cloudsync_changes` into a single monolithic payload. This is the legacy payload API and remains fully supported for backward compatibility. + +Use this API when the expected payload size is modest or when you need to interoperate with callers that expect a single BLOB. For large rowsets or large individual BLOB/TEXT values, prefer [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version), which splits transport payloads according to `payload_max_chunk_size`. + +**Parameters:** The function is an aggregate over the columns returned by `cloudsync_changes`: + +- `tbl` (TEXT): Source table name. +- `pk` (BLOB): Encoded primary key. +- `col_name` (TEXT): Changed column name. +- `col_value` (BLOB): Encoded column value. +- `col_version` (INTEGER/BIGINT): Column version. +- `db_version` (INTEGER/BIGINT): Source database version. +- `site_id` (BLOB): Source site identifier. +- `cl` (INTEGER/BIGINT): Causal length. +- `seq` (INTEGER/BIGINT): Sequence number within the source database version. + +**Returns:** A single payload BLOB. + +**Example:** + +```sql +SELECT cloudsync_payload_encode( + tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq +) AS payload +FROM cloudsync_changes; +``` + +--- + +### `cloudsync_payload_chunks([since_db_version], [filter_site_id], [until_db_version])` + +**Description:** Generates sync payloads as a stream of transport-sized chunks. It is the chunk-aware evolution of [`cloudsync_payload_encode()`](#cloudsync_payload_encodetbl-pk-col_name-col_value-col_version-db_version-site_id-cl-seq), designed for large rowsets and for single BLOB/TEXT values that are larger than the configured chunk size. + +The maximum generated chunk size is controlled by the global `payload_max_chunk_size` setting. The default is 5 MB and the technical minimum is 256 KB: + +```sql +SELECT cloudsync_set('payload_max_chunk_size', '5242880'); +``` + +When a single encoded column value does not fit in one chunk, CloudSync transparently emits v3 payload fragments for that value. The receiver stages fragments internally and applies the value when all parts arrive. Fragments can arrive out of order; incomplete stale fragment groups are cleaned up automatically. + +`cloudsync_payload_chunks()` does not change the apply contract: [`cloudsync_payload_apply()`](#cloudsync_payload_applypayload) accepts legacy payloads, monolithic payloads, and v3 chunk-fragment payloads. The local `payload_max_chunk_size` setting is not used to reject incoming payloads. + +**Important memory note:** chunking limits the size of each transport payload that CloudSync generates. It does not remove the database engine's need to materialize a single final cell value when applying a very large BLOB/TEXT column. In other words, a 500 MB BLOB can be transported in smaller chunks, but the receiving database must still be able to store and bind the completed 500 MB value when that row is applied. + +**Parameters:** + +- `since_db_version` (INTEGER/BIGINT, optional): Start after this source database version. If omitted, CloudSync uses the stored send checkpoint. +- `filter_site_id` (BLOB, optional): Site ID whose changes should be encoded. If omitted, CloudSync uses the local site ID. +- `until_db_version` (INTEGER/BIGINT, optional): Upper watermark to include. If omitted or `0`, CloudSync captures the current maximum source database version before streaming chunks. + +**Returns:** A rowset with one row per chunk: + +| Column | Description | +|---|---| +| `payload` | Payload BLOB to pass to `cloudsync_payload_apply()`. | +| `chunk_index` | Zero-based chunk index for this stream. | +| `payload_size` | Payload size in bytes. | +| `rows` | Number of encoded payload rows in this chunk. Fragment chunks usually contain one fragment row. | +| `db_version_min` | Minimum source `db_version` represented by this chunk. | +| `db_version_max` | Maximum source `db_version` represented by this chunk. | +| `watermark_db_version` | Stable upper watermark captured for this chunk stream. Store this after all chunks are durably transferred/applied. | + +**SQLite usage:** `cloudsync_payload_chunks` is exposed as a virtual table with hidden constraint columns: + +```sql +-- Default: uses the stored send checkpoint and local site id +SELECT payload, chunk_index, payload_size, watermark_db_version +FROM cloudsync_payload_chunks +ORDER BY chunk_index; + +-- Explicit arguments through hidden columns +SELECT payload, chunk_index, payload_size, watermark_db_version +FROM cloudsync_payload_chunks +WHERE since_db_version = 100 + AND site_id = cloudsync_siteid() + AND until_db_version = 200 +ORDER BY chunk_index; +``` + +**PostgreSQL usage:** `cloudsync_payload_chunks` is exposed as a set-returning function with three optional arguments: + +```sql +-- Default: uses the stored send checkpoint and local site id +SELECT * +FROM cloudsync_payload_chunks(); + +-- Explicit arguments +SELECT * +FROM cloudsync_payload_chunks(100, cloudsync_siteid(), 200); +``` + +**Apply example:** + +```sql +-- Apply chunks on a receiving peer. Chunks may be applied one at a time. +SELECT cloudsync_payload_apply(?); +``` + +On PostgreSQL, apply chunks as individual statements from the transport/client layer. Do not use a set-based statement such as `SELECT cloudsync_payload_apply(payload) FROM chunks_table;` while reading payloads from a table in the same database session. `cloudsync_payload_apply()` performs writes through SPI, and applying while the same statement is still scanning a payload table can conflict with PostgreSQL executor resource ownership. Fetch each payload into the client (or into a local procedural variable after the read completes) and then call `cloudsync_payload_apply()` for that single payload. + +--- + +### `cloudsync_payload_apply(payload)` + +**Description:** Applies a sync payload to the current database. The function accepts all supported payload formats: + +- Legacy payloads generated by older SQLite Sync versions. +- Monolithic payloads generated by [`cloudsync_payload_encode()`](#cloudsync_payload_encodetbl-pk-col_name-col_value-col_version-db_version-site_id-cl-seq). +- Chunk-fragment payloads generated by [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version). + +When a v3 fragment payload is received, CloudSync stores the fragment in an internal table and returns after applying zero or more completed values. Once the final fragment for a value is received, the completed value is validated and applied. Duplicate fragment delivery is idempotent. + +**Parameters:** + +- `payload` (BLOB/BYTEA): Payload BLOB to apply. + +**Returns:** Number of payload rows applied. Fragment payloads that are staged but not yet complete can return `0`. + +**Example:** + +```sql +SELECT cloudsync_payload_apply(:payload); +``` + +--- + ## Network Functions ### `cloudsync_network_init(managedDatabaseId)` @@ -500,6 +667,10 @@ This means: if you get JSON back, the server was reachable and the network proto **Description:** Sends all unsent local changes to the remote server. +The send path streams payloads through [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version), so `payload_max_chunk_size` also limits the payloads generated for network transport. Each generated chunk is uploaded/applied independently; the local send checkpoint is advanced only after the chunk stream completes successfully. + +Chunk transport is transparent to the CloudSync backend. Each chunk is sent as a normal `/apply` payload, either inline as a base64 `blob` or through the upload `url` path. There is no separate chunk flag: old payloads, monolithic payloads, and v3 fragment payloads are distinguished by the payload format itself. + **Parameters:** None. **Returns:** A JSON string with the send result: diff --git a/CHANGELOG.md b/CHANGELOG.md index 158a6ff..84e0b6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,19 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [Unreleased] + +### Added + +- **Chunked payload generation** via `cloudsync_payload_chunks()`, available as a SQLite virtual table and as a PostgreSQL set-returning function. The API emits transport-sized payload chunks and transparently fragments oversized BLOB/TEXT values into v3 fragment payloads. +- **`payload_max_chunk_size` global setting** for controlling generated chunk size. The default is 5 MB and values below the 256 KB technical minimum are clamped. +- **Payload chunking documentation** in `API.md` and `PERFORMANCE.md`, including the explicit memory note that chunking bounds transport payloads but the database must still materialize a completed single BLOB/TEXT value when it is applied. + +### Changed + +- `cloudsync_payload_apply()` now accepts legacy payloads, monolithic payloads, and v3 fragment payloads without enforcing the local `payload_max_chunk_size`, preserving compatibility between peers with different settings. +- `cloudsync_network_send_changes()` now streams outgoing changes through `cloudsync_payload_chunks()` instead of first building one monolithic payload. This bounds transport payload size for the built-in network path and lets large rowsets or oversized BLOB/TEXT values flow through the same `/apply` endpoint as regular payloads. + ## [1.0.20] - 2026-05-26 ### Changed diff --git a/PERFORMANCE.md b/PERFORMANCE.md index 236ab95..160a28f 100644 --- a/PERFORMANCE.md +++ b/PERFORMANCE.md @@ -41,7 +41,7 @@ SELECT ... FROM cloudsync_changes WHERE db_version > Each metadata table has an **index on `db_version`**, so payload generation scales primarily with the number of new changes, plus a small per-synced-table overhead to construct the `cloudsync_changes` query. It does not diff the full dataset. In SQLite, each changed column also performs a primary-key lookup in the base table to retrieve the current value. -The resulting payload is LZ4-compressed before transmission. +The legacy `cloudsync_payload_encode()` API builds one monolithic LZ4-compressed payload before transmission. For large deltas, `cloudsync_payload_chunks()` can be used instead: it streams a sequence of payload chunks bounded by the `payload_max_chunk_size` setting (default 5 MB, minimum 256 KB). If a single encoded BLOB/TEXT value is larger than the chunk budget, the value is split into transparent v3 fragments and reassembled by `cloudsync_payload_apply()` on the receiver. #### Pull: Payload Application @@ -69,7 +69,7 @@ When the application runs sync off the main thread, perceived latency depends on - **Sync interval**: How often the app triggers a push/pull cycle. More frequent syncs mean smaller deltas (smaller D) and faster individual sync operations, at the cost of more network round-trips. - **Network latency**: The round-trip time to the sync server. LZ4 compression reduces payload size, but latency is dominated by the network hop itself for small deltas. -- **Payload size**: Proportional to D x average column value size. Large BLOBs or TEXT values will increase transfer time linearly. +- **Payload size**: Proportional to D x average column value size. Large BLOBs or TEXT values will increase transfer time linearly. Use `cloudsync_payload_chunks()` when transport payloads may be large; it limits each generated transport payload but does not change the size of the final database value. The extension does not impose a sync schedule -- the application controls when and how often to sync. A typical pattern is to sync on a timer (e.g., every 5-30 seconds) or on specific events (app foreground, user action). @@ -118,7 +118,11 @@ Normal application reads are not directly instrumented by the extension. No trig When a new device syncs for the first time (`db_version = 0`), the push payload contains the **entire dataset**: every column of every row across all synced tables. The payload size is proportional to `N * C` (total rows times columns). -The payload is built entirely in memory, starting with a 512 KB buffer (`CLOUDSYNC_PAYLOAD_MINBUF_SIZE` in `src/cloudsync.c`) and growing via `realloc` as needed. Peak memory usage is at least the full uncompressed payload size and can be higher during compression. For a database with 1 million rows and 10 columns of average 50 bytes each, the uncompressed payload could reach ~500 MB before LZ4 compression. +With the legacy `cloudsync_payload_encode()` API, the payload is built entirely in memory, starting with a 512 KB buffer (`CLOUDSYNC_PAYLOAD_MINBUF_SIZE` in `src/cloudsync.c`) and growing via `realloc` as needed. Peak memory usage is at least the full uncompressed payload size and can be higher during compression. For a database with 1 million rows and 10 columns of average 50 bytes each, the uncompressed payload could reach ~500 MB before LZ4 compression. + +For large initial syncs, prefer `cloudsync_payload_chunks()`. It keeps each generated transport payload bounded by `payload_max_chunk_size` and can fragment a single oversized BLOB/TEXT column across multiple v3 fragment payloads. This prevents the transport payload itself from growing without bound and avoids constructing a monolithic v2 payload during v3 apply. + +Important limitation: chunking does **not** make a single database cell streamable all the way into the storage engine. When the last fragment of a very large BLOB/TEXT value arrives, the receiver must still materialize the completed value once in order to bind/store it in the destination database. Size `payload_max_chunk_size` for transport safety, but size application memory limits for the largest individual value you allow. Subsequent syncs are incremental (proportional to D, changes since the last sync), so the first sync is the expensive one. Applications with large datasets should plan for this -- for example, by seeding new devices from a database snapshot rather than syncing from scratch. @@ -185,6 +189,7 @@ CloudSync: sync_time ~ O(D) -- grows with changes since last sy 2. **`db_version` index**: Enables efficient range scans for delta extraction. 3. **Deferred batch merge**: Column changes for the same primary key are accumulated and flushed as a single SQL statement. 4. **Prepared statement caching**: Merge statements are compiled once and reused across rows. -5. **LZ4 compression**: Reduces payload size for network transfer. -6. **Per-column tracking**: Only changed columns are included in the sync payload, not entire rows. -7. **Early exit on stale data**: The CLS algorithm skips rows where the incoming causal length is lower than the local one, avoiding unnecessary column-level comparisons. +5. **Chunked payload generation**: `cloudsync_payload_chunks()` bounds transport payload size and handles oversized single values with transparent v3 fragments. +6. **LZ4 compression**: Reduces payload size for network transfer. +7. **Per-column tracking**: Only changed columns are included in the sync payload, not entire rows. +8. **Early exit on stale data**: The CLS algorithm skips rows where the incoming causal length is lower than the local one, avoiding unnecessary column-level comparisons. diff --git a/README.md b/README.md index 6bdaa30..87ad410 100644 --- a/README.md +++ b/README.md @@ -219,6 +219,7 @@ See the full guide: **[Row-Level Security Documentation](./docs/row-level-securi ## Documentation - **[API Reference](./API.md)**: all functions, parameters, and examples +- **[Performance & Overhead](./PERFORMANCE.md)**: sync cost model, payload chunking, and large-value memory notes - **[Installation Guide](./docs/installation.md)**: platform-specific setup (Swift, Android, Expo, React Native, Flutter, WASM) - **[Block-Level LWW Guide](./docs/block-lww.md)**: line-level text merge for markdown and documents - **[Row-Level Security Guide](./docs/row-level-security.md)**: multi-tenant access control with server-enforced policies diff --git a/docker/postgresql/Dockerfile b/docker/postgresql/Dockerfile index b86e6dc..e18b71b 100644 --- a/docker/postgresql/Dockerfile +++ b/docker/postgresql/Dockerfile @@ -6,6 +6,7 @@ FROM postgres:${POSTGRES_TAG} # and install the matching server-dev package RUN apt-get update && apt-get install -y \ build-essential \ + postgresql-contrib-${PG_MAJOR} \ postgresql-server-dev-${PG_MAJOR} \ git \ make \ diff --git a/docker/postgresql/Dockerfile.debug b/docker/postgresql/Dockerfile.debug index 3f77c04..c554936 100644 --- a/docker/postgresql/Dockerfile.debug +++ b/docker/postgresql/Dockerfile.debug @@ -44,7 +44,9 @@ RUN set -eux; \ cd /usr/src/postgresql-17; \ ./configure --enable-debug --enable-cassert --without-icu CFLAGS="-O0 -g3 -fno-omit-frame-pointer"; \ make -j"$(nproc)"; \ - make install + make install; \ + make -C contrib/dblink -j"$(nproc)"; \ + make -C contrib/dblink install ENV PATH="/usr/local/pgsql/bin:${PATH}" ENV LD_LIBRARY_PATH="/usr/local/pgsql/lib:${LD_LIBRARY_PATH}" diff --git a/docker/postgresql/Dockerfile.debug-no-optimization b/docker/postgresql/Dockerfile.debug-no-optimization index 3f77c04..c554936 100644 --- a/docker/postgresql/Dockerfile.debug-no-optimization +++ b/docker/postgresql/Dockerfile.debug-no-optimization @@ -44,7 +44,9 @@ RUN set -eux; \ cd /usr/src/postgresql-17; \ ./configure --enable-debug --enable-cassert --without-icu CFLAGS="-O0 -g3 -fno-omit-frame-pointer"; \ make -j"$(nproc)"; \ - make install + make install; \ + make -C contrib/dblink -j"$(nproc)"; \ + make -C contrib/dblink install ENV PATH="/usr/local/pgsql/bin:${PATH}" ENV LD_LIBRARY_PATH="/usr/local/pgsql/lib:${LD_LIBRARY_PATH}" diff --git a/src/cloudsync.c b/src/cloudsync.c index 908e9c1..05546b6 100644 --- a/src/cloudsync.c +++ b/src/cloudsync.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "cloudsync.h" #include "lz4.h" @@ -55,8 +56,11 @@ #define CLOUDSYNC_PAYLOAD_VERSION_ORIGNAL 1 #define CLOUDSYNC_PAYLOAD_VERSION_1 CLOUDSYNC_PAYLOAD_VERSION_ORIGNAL #define CLOUDSYNC_PAYLOAD_VERSION_2 2 +#define CLOUDSYNC_PAYLOAD_VERSION_3 3 #define CLOUDSYNC_PAYLOAD_VERSION_LATEST CLOUDSYNC_PAYLOAD_VERSION_2 #define CLOUDSYNC_PAYLOAD_MIN_VERSION_WITH_CHECKSUM CLOUDSYNC_PAYLOAD_VERSION_2 +#define CLOUDSYNC_PAYLOAD_FRAGMENT_PREFIX "__cloudsync_frag_v1__:" +#define CLOUDSYNC_PAYLOAD_FRAGMENT_STALE_SECONDS (24*60*60) #ifndef MAX #define MAX(a, b) (((a)>(b))?(a):(b)) @@ -237,6 +241,7 @@ struct cloudsync_payload_context { size_t bused; uint64_t nrows; uint16_t ncols; + uint8_t version; }; #ifdef _MSC_VER @@ -3029,7 +3034,15 @@ size_t cloudsync_payload_context_size (size_t *header_size) { return sizeof(cloudsync_payload_context); } -void cloudsync_payload_header_init (cloudsync_payload_header *header, uint32_t expanded_size, uint16_t ncols, uint32_t nrows, uint64_t hash) { +uint64_t cloudsync_payload_context_nrows (cloudsync_payload_context *payload) { + return payload ? payload->nrows : 0; +} + +size_t cloudsync_payload_context_bused (cloudsync_payload_context *payload) { + return payload ? payload->bused : 0; +} + +void cloudsync_payload_header_init (cloudsync_payload_header *header, uint8_t version, uint32_t expanded_size, uint16_t ncols, uint32_t nrows, uint64_t hash) { memset(header, 0, sizeof(cloudsync_payload_header)); assert(sizeof(cloudsync_payload_header)==32); @@ -3037,7 +3050,7 @@ void cloudsync_payload_header_init (cloudsync_payload_header *header, uint32_t e sscanf(CLOUDSYNC_VERSION, "%d.%d.%d", &major, &minor, &patch); header->signature = htonl(CLOUDSYNC_PAYLOAD_SIGNATURE); - header->version = CLOUDSYNC_PAYLOAD_VERSION_2; + header->version = version; header->libversion[0] = (uint8_t)major; header->libversion[1] = (uint8_t)minor; header->libversion[2] = (uint8_t)patch; @@ -3073,6 +3086,320 @@ int cloudsync_payload_encode_step (cloudsync_payload_context *payload, cloudsync return DBRES_OK; } +static bool cloudsync_payload_append_raw (cloudsync_payload_context *payload, cloudsync_context *data, const char **fields, const size_t *field_sizes, int nfields, uint8_t version) { + size_t needed = 0; + for (int i = 0; i < nfields; ++i) { + if (field_sizes[i] > SIZE_MAX - needed) { + cloudsync_set_error(data, "cloudsync payload raw row too large", DBRES_NOMEM); + return false; + } + needed += field_sizes[i]; + } + if (!cloudsync_payload_encode_check(payload, needed)) { + cloudsync_set_error(data, "Not enough memory to resize payload internal buffer", DBRES_NOMEM); + return false; + } + if (payload->nrows == 0) { + payload->ncols = (uint16_t)nfields; + payload->version = version; + } + char *dst = payload->buffer + payload->bused; + for (int i = 0; i < nfields; ++i) { + memcpy(dst, fields[i], field_sizes[i]); + dst += field_sizes[i]; + } + payload->bused += needed; + ++payload->nrows; + return true; +} + +int cloudsync_payload_max_chunk_size (cloudsync_context *data) { + int64_t value = dbutils_settings_get_int64_value(data, CLOUDSYNC_KEY_PAYLOAD_MAX_CHUNK_SIZE); + if (value <= 0) value = CLOUDSYNC_PAYLOAD_CHUNK_DEFAULT_SIZE; + if (value < CLOUDSYNC_PAYLOAD_CHUNK_MIN_SIZE) value = CLOUDSYNC_PAYLOAD_CHUNK_MIN_SIZE; + if (value > INT_MAX) value = INT_MAX; + return (int)value; +} + +int cloudsync_payload_fragment_target_size (cloudsync_context *data) { + int max_size = cloudsync_payload_max_chunk_size(data); + int target = max_size - (int)sizeof(cloudsync_payload_header) - CLOUDSYNC_PAYLOAD_CHUNK_SAFETY_MARGIN; + if (target < 1024) target = 1024; + return target; +} + +static size_t cloudsync_payload_decimal_len_i64 (int64_t value) { + size_t len = value < 0 ? 1 : 0; + uint64_t v = (value < 0) ? (uint64_t)(-(value + 1)) + 1u : (uint64_t)value; + do { + len++; + v /= 10u; + } while (v != 0); + return len; +} + +static bool cloudsync_payload_size_add (size_t *acc, size_t value) { + if (value > SIZE_MAX - *acc) return false; + *acc += value; + return true; +} + +int cloudsync_payload_fragment_count (int64_t total_size, int target_size) { + if (total_size <= 0 || target_size <= 0) return 0; + uint64_t total = (uint64_t)total_size; + uint64_t target = (uint64_t)target_size; + uint64_t count = total / target + ((total % target) != 0); + if (count == 0 || count > INT_MAX) return 0; + return (int)count; +} + +int cloudsync_payload_fragment_data_size (cloudsync_context *data, + const char *tbl, int tbl_len, + const void *pk, int pk_len, + const char *col_name, int col_name_len, + int64_t col_version, int64_t db_version, + const void *site_id, int site_id_len, + int64_t cl, int64_t seq, + int64_t total_size, + int part_index, int part_count) { + UNUSED_PARAMETER(pk); + UNUSED_PARAMETER(site_id); + if (tbl_len < 0 && tbl) tbl_len = (int)strlen(tbl); + if (col_name_len < 0 && col_name) col_name_len = (int)strlen(col_name); + if (tbl_len < 0 || pk_len < 0 || col_name_len < 0 || site_id_len < 0 || total_size < 0 || part_index < 0 || part_count <= 0) { + return 0; + } + + size_t fixed = sizeof(cloudsync_payload_header); + size_t frag_col_len = strlen(CLOUDSYNC_PAYLOAD_FRAGMENT_PREFIX) + 32 + 1 + 16 + 1 + + cloudsync_payload_decimal_len_i64(part_index) + 1 + + cloudsync_payload_decimal_len_i64(part_count) + 1 + + cloudsync_payload_decimal_len_i64(total_size) + 1 + + (size_t)col_name_len; + size_t sizes[] = { + pk_encode_raw_size(DBTYPE_TEXT, tbl_len), + pk_encode_raw_size(DBTYPE_BLOB, pk_len), + pk_encode_raw_size(DBTYPE_TEXT, (int64_t)frag_col_len), + pk_encode_raw_size(DBTYPE_INTEGER, col_version), + pk_encode_raw_size(DBTYPE_INTEGER, db_version), + pk_encode_raw_size(DBTYPE_BLOB, site_id_len), + pk_encode_raw_size(DBTYPE_INTEGER, cl), + pk_encode_raw_size(DBTYPE_INTEGER, seq) + }; + for (size_t i = 0; i < sizeof(sizes) / sizeof(sizes[0]); ++i) { + if (sizes[i] == SIZE_MAX || !cloudsync_payload_size_add(&fixed, sizes[i])) return 0; + } + + int max_size = cloudsync_payload_max_chunk_size(data); + if (fixed >= (size_t)max_size) return 0; + + size_t candidate = (size_t)max_size - fixed; + if (candidate > INT_MAX) candidate = INT_MAX; + while (candidate > 0) { + size_t frag_size = pk_encode_raw_size(DBTYPE_BLOB, (int64_t)candidate); + if (frag_size == SIZE_MAX) return 0; + if (fixed <= (size_t)max_size && frag_size <= (size_t)max_size - fixed) return (int)candidate; + size_t total = fixed + frag_size; + size_t over = total > (size_t)max_size ? total - (size_t)max_size : 1; + if (candidate <= over) return 0; + candidate -= over; + } + return 0; +} + +int cloudsync_payload_encoded_value_header (dbvalue_t *value, char *header, int header_cap, int64_t *payload_len) { + if (!value || !header || header_cap <= 0 || !payload_len) return -1; + int type = database_value_type(value); + *payload_len = 0; + if (type != DBTYPE_TEXT && type != DBTYPE_BLOB) return 0; + int64_t len = database_value_bytes(value); + if (len < 0) return -1; + *payload_len = len; + size_t total = pk_encode_raw_size(type, len); + if (total == SIZE_MAX || total < (size_t)len || total - (size_t)len > (size_t)header_cap) return -1; + if (type == DBTYPE_TEXT) { + size_t nbytes = pk_encode_raw_size(type, len) - (size_t)len - 1; + uint8_t type_byte = (uint8_t)((nbytes << 3) | DBTYPE_TEXT); + header[0] = (char)type_byte; + for (size_t i = 0; i < nbytes; i++) header[1 + i] = (uint8_t)(((uint64_t)len >> (8 * (nbytes - 1 - i))) & 0xFFu); + return (int)(1 + nbytes); + } else { + size_t nbytes = pk_encode_raw_size(type, len) - (size_t)len - 1; + uint8_t type_byte = (uint8_t)((nbytes << 3) | DBTYPE_BLOB); + header[0] = (char)type_byte; + for (size_t i = 0; i < nbytes; i++) header[1 + i] = (uint8_t)(((uint64_t)len >> (8 * (nbytes - 1 - i))) & 0xFFu); + return (int)(1 + nbytes); + } +} + +uint64_t cloudsync_payload_encoded_value_checksum (dbvalue_t *value) { + if (!value) return 0; + int type = database_value_type(value); + if (type != DBTYPE_TEXT && type != DBTYPE_BLOB) { + size_t len = pk_encode_size(&value, 1, 0, -1); + char stack[32]; + char *buf = stack; + if (len > sizeof(stack)) buf = cloudsync_memory_alloc((uint64_t)len); + if (!buf) return 0; + size_t bsize = len; + pk_encode(&value, 1, buf, false, &bsize, -1); + uint64_t h = pk_checksum(buf, bsize); + if (buf != stack) cloudsync_memory_free(buf); + return h; + } + char header[16]; + int64_t payload_len = 0; + int header_len = cloudsync_payload_encoded_value_header(value, header, sizeof(header), &payload_len); + if (header_len <= 0) return 0; + uint64_t h = pk_checksum(header, (size_t)header_len); + const char *p = (const char *)database_value_blob(value); + if (p && payload_len > 0) { + const uint8_t *bytes = (const uint8_t *)p; + for (int64_t i = 0; i < payload_len; ++i) { + h ^= bytes[i]; + h *= 1099511628211ULL; + } + } + return h; +} + +static uint64_t cloudsync_checksum_update (uint64_t h, const void *data, size_t len) { + const uint8_t *p = (const uint8_t *)data; + for (size_t i = 0; i < len; ++i) { + h ^= p[i]; + h *= 1099511628211ULL; + } + return h; +} + +static uint64_t cloudsync_checksum_update_i64 (uint64_t h, int64_t value) { + uint64_t v = (uint64_t)value; + for (int i = 7; i >= 0; --i) { + uint8_t b = (uint8_t)((v >> (8 * i)) & 0xffu); + h = cloudsync_checksum_update(h, &b, 1); + } + return h; +} + +static void cloudsync_payload_fragment_value_id (char out[33], + const char *tbl, int tbl_len, + const void *pk, int pk_len, + const char *col_name, int col_name_len, + int64_t col_version, int64_t db_version, + const void *site_id, int site_id_len, + int64_t cl, int64_t seq, + uint64_t value_checksum, + int64_t total_size) { + uint64_t h1 = 14695981039346656037ULL; + uint64_t h2 = 1099511628211ULL; + const char sep = '\x1f'; + + h1 = cloudsync_checksum_update(h1, tbl, (size_t)tbl_len); + h1 = cloudsync_checksum_update(h1, &sep, 1); + h1 = cloudsync_checksum_update(h1, pk, (size_t)pk_len); + h1 = cloudsync_checksum_update(h1, &sep, 1); + h1 = cloudsync_checksum_update(h1, col_name, (size_t)col_name_len); + h1 = cloudsync_checksum_update(h1, &sep, 1); + h1 = cloudsync_checksum_update(h1, site_id, (size_t)site_id_len); + h1 = cloudsync_checksum_update_i64(h1, col_version); + h1 = cloudsync_checksum_update_i64(h1, db_version); + h1 = cloudsync_checksum_update_i64(h1, cl); + h1 = cloudsync_checksum_update_i64(h1, seq); + h1 = cloudsync_checksum_update_i64(h1, (int64_t)value_checksum); + h1 = cloudsync_checksum_update_i64(h1, total_size); + + h2 = cloudsync_checksum_update_i64(h2, total_size); + h2 = cloudsync_checksum_update_i64(h2, (int64_t)value_checksum); + h2 = cloudsync_checksum_update(h2, site_id, (size_t)site_id_len); + h2 = cloudsync_checksum_update(h2, col_name, (size_t)col_name_len); + h2 = cloudsync_checksum_update(h2, pk, (size_t)pk_len); + h2 = cloudsync_checksum_update(h2, tbl, (size_t)tbl_len); + h2 = cloudsync_checksum_update_i64(h2, seq); + h2 = cloudsync_checksum_update_i64(h2, cl); + h2 = cloudsync_checksum_update_i64(h2, db_version); + h2 = cloudsync_checksum_update_i64(h2, col_version); + + snprintf(out, 33, "%016" PRIx64 "%016" PRIx64, h1, h2); +} + +int cloudsync_payload_encode_fragment_step (cloudsync_payload_context *payload, cloudsync_context *data, + const char *tbl, int tbl_len, + const void *pk, int pk_len, + const char *col_name, int col_name_len, + const void *fragment, int fragment_len, + int64_t col_version, int64_t db_version, + const void *site_id, int site_id_len, + int64_t cl, int64_t seq, + uint64_t value_checksum, + int64_t total_size, + int part_index, int part_count) { + if (!payload || !data || !tbl || !pk || !col_name || !fragment || !site_id) return DBRES_MISUSE; + if (tbl_len < 0) tbl_len = (int)strlen(tbl); + if (col_name_len < 0) col_name_len = (int)strlen(col_name); + if (tbl_len < 0 || pk_len < 0 || col_name_len < 0 || fragment_len <= 0 || site_id_len < 0 || + total_size <= 0 || part_index < 0 || part_count <= 0 || part_index >= part_count) { + return DBRES_MISUSE; + } + + char value_id[33]; + char checksum_hex[17]; + cloudsync_payload_fragment_value_id(value_id, tbl, tbl_len, pk, pk_len, col_name, col_name_len, + col_version, db_version, site_id, site_id_len, cl, seq, + value_checksum, total_size); + snprintf(checksum_hex, sizeof(checksum_hex), "%016" PRIx64, value_checksum); + + char *frag_col = cloudsync_memory_mprintf("%s%s:%s:%d:%d:%" PRId64 ":%.*s", + CLOUDSYNC_PAYLOAD_FRAGMENT_PREFIX, + value_id, checksum_hex, part_index, part_count, total_size, + col_name_len, col_name); + if (!frag_col) return DBRES_NOMEM; + + size_t sizes[9] = {0}; + sizes[0] = pk_encode_raw_size(DBTYPE_TEXT, tbl_len); + sizes[1] = pk_encode_raw_size(DBTYPE_BLOB, pk_len); + sizes[2] = pk_encode_raw_size(DBTYPE_TEXT, (int64_t)strlen(frag_col)); + sizes[3] = pk_encode_raw_size(DBTYPE_BLOB, fragment_len); + sizes[4] = pk_encode_raw_size(DBTYPE_INTEGER, col_version); + sizes[5] = pk_encode_raw_size(DBTYPE_INTEGER, db_version); + sizes[6] = pk_encode_raw_size(DBTYPE_BLOB, site_id_len); + sizes[7] = pk_encode_raw_size(DBTYPE_INTEGER, cl); + sizes[8] = pk_encode_raw_size(DBTYPE_INTEGER, seq); + for (int i = 0; i < 9; ++i) { + if (sizes[i] == SIZE_MAX) { cloudsync_memory_free(frag_col); return DBRES_NOMEM; } + } + + char stack[9][64]; + char *fields[9] = {0}; + for (int i = 0; i < 9; ++i) { + fields[i] = sizes[i] <= sizeof(stack[0]) ? stack[i] : cloudsync_memory_alloc((uint64_t)sizes[i]); + if (!fields[i]) { + for (int j = 0; j < i; ++j) if (fields[j] && (fields[j] < (char *)stack || fields[j] >= (char *)(stack + 9))) cloudsync_memory_free(fields[j]); + cloudsync_memory_free(frag_col); + return DBRES_NOMEM; + } + } + + pk_encode_raw_text(fields[0], tbl, (size_t)tbl_len); + pk_encode_raw_blob(fields[1], pk, (size_t)pk_len); + pk_encode_raw_text(fields[2], frag_col, strlen(frag_col)); + pk_encode_raw_blob(fields[3], fragment, (size_t)fragment_len); + pk_encode_raw_int(fields[4], col_version); + pk_encode_raw_int(fields[5], db_version); + pk_encode_raw_blob(fields[6], site_id, (size_t)site_id_len); + pk_encode_raw_int(fields[7], cl); + pk_encode_raw_int(fields[8], seq); + + const char *cfields[9]; + for (int i = 0; i < 9; ++i) cfields[i] = fields[i]; + bool ok = cloudsync_payload_append_raw(payload, data, cfields, sizes, 9, CLOUDSYNC_PAYLOAD_VERSION_3); + + for (int i = 0; i < 9; ++i) { + if (!(fields[i] >= (char *)stack && fields[i] < (char *)(stack + 9))) cloudsync_memory_free(fields[i]); + } + cloudsync_memory_free(frag_col); + return ok ? DBRES_OK : cloudsync_errcode(data); +} + int cloudsync_payload_encode_final (cloudsync_payload_context *payload, cloudsync_context *data) { DEBUG_FUNCTION("cloudsync_payload_encode_final"); @@ -3122,7 +3449,8 @@ int cloudsync_payload_encode_final (cloudsync_payload_context *payload, cloudsyn // setup payload header cloudsync_payload_header header = {0}; uint32_t expanded_size = (use_uncompressed_buffer) ? 0 : real_buffer_size; - cloudsync_payload_header_init(&header, expanded_size, payload->ncols, (uint32_t)payload->nrows, data->schema_hash); + uint8_t version = payload->version ? payload->version : CLOUDSYNC_PAYLOAD_VERSION_LATEST; + cloudsync_payload_header_init(&header, version, expanded_size, payload->ncols, (uint32_t)payload->nrows, data->schema_hash); // if compression fails or if compressed size is bigger than original buffer, then use the uncompressed buffer if (use_uncompressed_buffer) { @@ -3208,6 +3536,435 @@ static int cloudsync_payload_decode_callback (void *xdata, int index, int type, return rc; } +typedef struct { + const char *tbl; + int64_t tbl_len; + const void *pk; + int64_t pk_len; + const char *col_name; + int64_t col_name_len; + const void *col_value; + int64_t col_value_len; + int64_t col_version; + int64_t db_version; + const void *site_id; + int64_t site_id_len; + int64_t cl; + int64_t seq; +} cloudsync_payload_fragment_row; + +static int cloudsync_payload_fragment_decode_callback (void *xdata, int index, int type, int64_t ival, double dval, char *pval) { + UNUSED_PARAMETER(dval); + cloudsync_payload_fragment_row *row = (cloudsync_payload_fragment_row *)xdata; + switch (index) { + case CLOUDSYNC_PK_INDEX_TBL: + if (type != DBTYPE_TEXT) return DBRES_ERROR; + row->tbl = pval; row->tbl_len = ival; + break; + case CLOUDSYNC_PK_INDEX_PK: + if (type != DBTYPE_BLOB) return DBRES_ERROR; + row->pk = pval; row->pk_len = ival; + break; + case CLOUDSYNC_PK_INDEX_COLNAME: + if (type != DBTYPE_TEXT) return DBRES_ERROR; + row->col_name = pval; row->col_name_len = ival; + break; + case CLOUDSYNC_PK_INDEX_COLVALUE: + if (type != DBTYPE_BLOB) return DBRES_ERROR; + row->col_value = pval; row->col_value_len = ival; + break; + case CLOUDSYNC_PK_INDEX_COLVERSION: + if (type != DBTYPE_INTEGER) return DBRES_ERROR; + row->col_version = ival; + break; + case CLOUDSYNC_PK_INDEX_DBVERSION: + if (type != DBTYPE_INTEGER) return DBRES_ERROR; + row->db_version = ival; + break; + case CLOUDSYNC_PK_INDEX_SITEID: + if (type != DBTYPE_BLOB) return DBRES_ERROR; + row->site_id = pval; row->site_id_len = ival; + break; + case CLOUDSYNC_PK_INDEX_CL: + if (type != DBTYPE_INTEGER) return DBRES_ERROR; + row->cl = ival; + break; + case CLOUDSYNC_PK_INDEX_SEQ: + if (type != DBTYPE_INTEGER) return DBRES_ERROR; + row->seq = ival; + break; + } + return DBRES_OK; +} + +static bool cloudsync_payload_is_hex (const char *value, size_t len) { + for (size_t i = 0; i < len; ++i) { + char c = value[i]; + if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) return false; + } + return true; +} + +static bool cloudsync_payload_parse_u64_segment (const char *start, const char *end, uint64_t max_value, uint64_t *out, const char **next) { + if (!start || !end || start >= end) return false; + uint64_t value = 0; + const char *p = start; + while (p < end && *p >= '0' && *p <= '9') { + uint64_t digit = (uint64_t)(*p - '0'); + if (value > (max_value - digit) / 10u) return false; + value = value * 10u + digit; + p++; + } + if (p == start || p >= end || *p != ':') return false; + *out = value; + *next = p + 1; + return true; +} + +static bool cloudsync_payload_fragment_parse_colname (const char *col_name, int64_t col_name_len, + char *value_id, size_t value_id_len, + char *checksum_hex, size_t checksum_hex_len, + int *part_index, int *part_count, + int64_t *total_size, + const char **base_col, int64_t *base_col_len) { + size_t prefix_len = strlen(CLOUDSYNC_PAYLOAD_FRAGMENT_PREFIX); + if (!col_name || col_name_len <= (int64_t)prefix_len) return false; + if (strncmp(col_name, CLOUDSYNC_PAYLOAD_FRAGMENT_PREFIX, prefix_len) != 0) return false; + + const char *p = col_name + prefix_len; + const char *end = col_name + col_name_len; + const char *sep = memchr(p, ':', (size_t)(end - p)); + if (!sep || (size_t)(sep - p) + 1 > value_id_len) return false; + if ((sep - p) != 32 || !cloudsync_payload_is_hex(p, (size_t)(sep - p))) return false; + memcpy(value_id, p, (size_t)(sep - p)); + value_id[sep - p] = 0; + + p = sep + 1; + sep = memchr(p, ':', (size_t)(end - p)); + if (!sep || (size_t)(sep - p) + 1 > checksum_hex_len) return false; + if ((sep - p) != 16 || !cloudsync_payload_is_hex(p, (size_t)(sep - p))) return false; + memcpy(checksum_hex, p, (size_t)(sep - p)); + checksum_hex[sep - p] = 0; + + const char *next = NULL; + uint64_t parsed = 0; + if (!cloudsync_payload_parse_u64_segment(sep + 1, end, INT_MAX, &parsed, &next)) return false; + *part_index = (int)parsed; + + if (!cloudsync_payload_parse_u64_segment(next, end, INT_MAX, &parsed, &next)) return false; + *part_count = (int)parsed; + + if (!cloudsync_payload_parse_u64_segment(next, end, INT64_MAX, &parsed, &next)) return false; + *total_size = (int64_t)parsed; + + *base_col = next; + *base_col_len = end - *base_col; + return (*part_count > 0 && *part_index < *part_count && *base_col_len > 0); +} + +typedef struct { + dbvm_t *vm; + int param_index; +} cloudsync_payload_bind_param_context; + +static int cloudsync_payload_bind_param_callback (void *xdata, int index, int type, int64_t ival, double dval, char *pval) { + UNUSED_PARAMETER(index); + cloudsync_payload_bind_param_context *ctx = (cloudsync_payload_bind_param_context *)xdata; + switch (type) { + case DBTYPE_INTEGER: return databasevm_bind_int(ctx->vm, ctx->param_index, ival); + case DBTYPE_FLOAT: return databasevm_bind_double(ctx->vm, ctx->param_index, dval); + case DBTYPE_NULL: return databasevm_bind_null(ctx->vm, ctx->param_index); + case DBTYPE_TEXT: return databasevm_bind_text(ctx->vm, ctx->param_index, pval, (int)ival); + case DBTYPE_BLOB: return databasevm_bind_blob(ctx->vm, ctx->param_index, pval, (uint64_t)ival); + } + return DBRES_MISUSE; +} + +static int cloudsync_payload_fragments_cleanup_stale (cloudsync_context *data) { + dbvm_t *vm = NULL; + int rc = databasevm_prepare(data, SQL_PAYLOAD_FRAGMENTS_CLEANUP_STALE, &vm, 0); + if (rc != DBRES_OK) return rc; + int64_t cutoff = (int64_t)time(NULL) - CLOUDSYNC_PAYLOAD_FRAGMENT_STALE_SECONDS; + rc = databasevm_bind_int(vm, 1, cutoff); + if (rc == DBRES_OK) rc = databasevm_step(vm); + databasevm_finalize(vm); + return (rc == DBRES_DONE) ? DBRES_OK : rc; +} + +static int cloudsync_payload_apply_single_decoded_row (cloudsync_context *data, + const char *tbl, size_t tbl_len, + const char *pk, size_t pk_len, + const char *col_name, size_t col_name_len, + const char *encoded_value, size_t encoded_value_len, + int64_t col_version, int64_t db_version, + const char *site_id, size_t site_id_len, + int64_t cl, int64_t seq, + int *pnrows) { + int rc = DBRES_OK; + dbvm_t *vm = NULL; + bool in_savepoint = false; + merge_pending_batch batch = {0}; + + rc = databasevm_prepare(data, SQL_CHANGES_INSERT_ROW, &vm, 0); + if (rc != DBRES_OK) return cloudsync_set_error(data, "Error on cloudsync_payload_apply: error while compiling SQL statement", rc); + + rc = databasevm_bind_text(vm, 1, tbl, (int)tbl_len); + if (rc == DBRES_OK) rc = databasevm_bind_blob(vm, 2, pk, (uint64_t)pk_len); + if (rc == DBRES_OK) rc = databasevm_bind_text(vm, 3, col_name, (int)col_name_len); + if (rc == DBRES_OK) { + if (data->skip_decode_idx == CLOUDSYNC_PK_INDEX_COLVALUE) { + rc = databasevm_bind_blob(vm, 4, encoded_value, (uint64_t)encoded_value_len); + } else { + size_t seek = 0; + cloudsync_payload_bind_param_context bind_ctx = {.vm = vm, .param_index = 4}; + int res = pk_decode((char *)encoded_value, encoded_value_len, 1, &seek, -1, cloudsync_payload_bind_param_callback, &bind_ctx); + if (res == -1 || seek != encoded_value_len) rc = cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid v3 encoded value", DBRES_MISUSE); + } + } + if (rc == DBRES_OK) rc = databasevm_bind_int(vm, 5, col_version); + if (rc == DBRES_OK) rc = databasevm_bind_int(vm, 6, db_version); + if (rc == DBRES_OK) rc = databasevm_bind_blob(vm, 7, site_id, (uint64_t)site_id_len); + if (rc == DBRES_OK) rc = databasevm_bind_int(vm, 8, cl); + if (rc == DBRES_OK) rc = databasevm_bind_int(vm, 9, seq); + if (rc != DBRES_OK) goto cleanup; + + if (!database_in_transaction(data)) { + rc = database_begin_savepoint(data, "cloudsync_payload_apply"); + if (rc != DBRES_OK) goto cleanup; + in_savepoint = true; + } + + data->pending_batch = &batch; + rc = databasevm_step(vm); + if (rc == DBRES_DONE) rc = DBRES_OK; + if (rc != DBRES_OK) { + cloudsync_set_dberror(data); + goto cleanup; + } + + rc = merge_flush_pending(data); + if (rc != DBRES_OK) goto cleanup; + data->pending_batch = NULL; + + if (in_savepoint) { + rc = database_commit_savepoint(data, "cloudsync_payload_apply"); + in_savepoint = false; + if (rc != DBRES_OK) goto cleanup; + } + + int dbversion = dbutils_settings_get_int_value(data, CLOUDSYNC_KEY_CHECK_DBVERSION); + int seq_setting = dbutils_settings_get_int_value(data, CLOUDSYNC_KEY_CHECK_SEQ); + if (db_version >= dbversion) { + char buf[256]; + snprintf(buf, sizeof(buf), "%" PRId64, db_version); + dbutils_settings_set_key_value(data, CLOUDSYNC_KEY_CHECK_DBVERSION, buf); + if (seq != seq_setting) { + snprintf(buf, sizeof(buf), "%" PRId64, seq); + dbutils_settings_set_key_value(data, CLOUDSYNC_KEY_CHECK_SEQ, buf); + } + } + + if (pnrows) *pnrows += 1; + +cleanup: + if (rc != DBRES_OK && in_savepoint) database_rollback_savepoint(data, "cloudsync_payload_apply"); + data->pending_batch = NULL; + merge_pending_free_entries(&batch); + if (batch.cached_vm) databasevm_finalize(batch.cached_vm); + if (batch.cached_col_names) cloudsync_memory_free(batch.cached_col_names); + if (batch.entries) cloudsync_memory_free(batch.entries); + if (vm) databasevm_finalize(vm); + return rc; +} + +static int cloudsync_payload_apply_reassembled_fragment (cloudsync_context *data, const char *value_id, const char *expected_checksum_hex, int *pnrows) { + int rc = DBRES_OK; + dbvm_t *vm = NULL; + char *value = NULL; + char *tbl = NULL, *col_name = NULL; + char *pk = NULL, *site_id = NULL; + size_t tbl_len = 0, col_name_len = 0, pk_len = 0, site_id_len = 0; + int64_t col_version = 0, db_version = 0, cl = 0, seq = 0; + int64_t total_size = 0, copied = 0; + + rc = databasevm_prepare(data, SQL_PAYLOAD_FRAGMENTS_COUNT, &vm, 0); + if (rc != DBRES_OK) return rc; + rc = databasevm_bind_text(vm, 1, value_id, -1); + if (rc != DBRES_OK) { databasevm_finalize(vm); return rc; } + rc = databasevm_step(vm); + if (rc != DBRES_ROW) { databasevm_finalize(vm); return DBRES_OK; } + int64_t have = database_column_int(vm, 0); + int64_t part_count_min = database_column_int(vm, 1); + int64_t part_count_max = database_column_int(vm, 2); + int64_t total_size_min = database_column_int(vm, 3); + int64_t total_size_max = database_column_int(vm, 4); + const char *checksum_min = database_column_text(vm, 5); + const char *checksum_max = database_column_text(vm, 6); + char checksum_min_copy[32] = {0}; + char checksum_max_copy[32] = {0}; + if (checksum_min) snprintf(checksum_min_copy, sizeof(checksum_min_copy), "%s", checksum_min); + if (checksum_max) snprintf(checksum_max_copy, sizeof(checksum_max_copy), "%s", checksum_max); + int64_t part_index_min = database_column_int(vm, 7); + int64_t part_index_max = database_column_int(vm, 8); + databasevm_finalize(vm); + vm = NULL; + if (have <= 0 || part_count_min <= 0 || have < part_count_max) return DBRES_OK; + if (part_count_min != part_count_max || total_size_min != total_size_max || !checksum_min_copy[0] || !checksum_max_copy[0] || + strcmp(checksum_min_copy, checksum_max_copy) != 0 || strcmp(checksum_min_copy, expected_checksum_hex) != 0 || + part_index_min != 0 || part_index_max != part_count_max - 1 || have != part_count_max) { + return cloudsync_set_error(data, "Error on cloudsync_payload_apply: inconsistent v3 fragments", DBRES_MISUSE); + } + total_size = total_size_max; + + value = cloudsync_memory_alloc((uint64_t)total_size); + if (!value) return DBRES_NOMEM; + + rc = databasevm_prepare(data, SQL_PAYLOAD_FRAGMENTS_SELECT, &vm, 0); + if (rc != DBRES_OK) goto cleanup; + rc = databasevm_bind_text(vm, 1, value_id, -1); + if (rc != DBRES_OK) goto cleanup; + + uint64_t checksum = 14695981039346656037ULL; + while ((rc = databasevm_step(vm)) == DBRES_ROW) { + size_t frag_len = 0; + const char *frag = database_column_blob(vm, 0, &frag_len); + if (copied + (int64_t)frag_len > total_size) { rc = DBRES_MISUSE; goto cleanup; } + memcpy(value + copied, frag, frag_len); + checksum = cloudsync_checksum_update(checksum, frag, frag_len); + copied += (int64_t)frag_len; + + if (!tbl) { + const char *t = database_column_text(vm, 1); + const char *c = database_column_text(vm, 3); + size_t pkl = 0, sidl = 0; + const char *p = database_column_blob(vm, 2, &pkl); + const char *sid = database_column_blob(vm, 6, &sidl); + tbl_len = (size_t)database_column_bytes(vm, 1); + col_name_len = (size_t)database_column_bytes(vm, 3); + pk_len = pkl; + site_id_len = sidl; + tbl = cloudsync_memory_alloc((uint64_t)tbl_len); + col_name = cloudsync_memory_alloc((uint64_t)col_name_len); + pk = cloudsync_memory_alloc((uint64_t)pk_len); + site_id = cloudsync_memory_alloc((uint64_t)site_id_len); + if (!tbl || !col_name || !pk || !site_id) { rc = DBRES_NOMEM; goto cleanup; } + memcpy(tbl, t, tbl_len); + memcpy(col_name, c, col_name_len); + memcpy(pk, p, pk_len); + memcpy(site_id, sid, site_id_len); + col_version = database_column_int(vm, 4); + db_version = database_column_int(vm, 5); + cl = database_column_int(vm, 7); + seq = database_column_int(vm, 8); + } else { + size_t pkl = 0, sidl = 0; + const char *t = database_column_text(vm, 1); + const char *c = database_column_text(vm, 3); + const char *p = database_column_blob(vm, 2, &pkl); + const char *sid = database_column_blob(vm, 6, &sidl); + if ((size_t)database_column_bytes(vm, 1) != tbl_len || memcmp(tbl, t, tbl_len) != 0 || + pkl != pk_len || memcmp(pk, p, pk_len) != 0 || + (size_t)database_column_bytes(vm, 3) != col_name_len || memcmp(col_name, c, col_name_len) != 0 || + database_column_int(vm, 4) != col_version || + database_column_int(vm, 5) != db_version || + sidl != site_id_len || memcmp(site_id, sid, site_id_len) != 0 || + database_column_int(vm, 7) != cl || + database_column_int(vm, 8) != seq) { + rc = DBRES_MISUSE; + goto cleanup; + } + } + } + if (rc == DBRES_DONE) rc = DBRES_OK; + if (rc != DBRES_OK) goto cleanup; + if (copied != total_size) { rc = DBRES_MISUSE; goto cleanup; } + char checksum_hex[17]; + snprintf(checksum_hex, sizeof(checksum_hex), "%016" PRIx64, checksum); + if (strcmp(checksum_hex, expected_checksum_hex) != 0) { rc = DBRES_MISUSE; goto cleanup; } + databasevm_finalize(vm); + vm = NULL; + + rc = cloudsync_payload_apply_single_decoded_row(data, tbl, tbl_len, pk, pk_len, col_name, col_name_len, + value, (size_t)total_size, col_version, db_version, + site_id, site_id_len, cl, seq, pnrows); + if (rc != DBRES_OK) goto cleanup; + + rc = databasevm_prepare(data, SQL_PAYLOAD_FRAGMENTS_DELETE, &vm, 0); + if (rc == DBRES_OK) { + databasevm_bind_text(vm, 1, value_id, -1); + int step_rc = databasevm_step(vm); + if (step_rc == DBRES_DONE) rc = DBRES_OK; + } + +cleanup: + if (vm) databasevm_finalize(vm); + if (value) cloudsync_memory_free(value); + if (tbl) cloudsync_memory_free(tbl); + if (col_name) cloudsync_memory_free(col_name); + if (pk) cloudsync_memory_free(pk); + if (site_id) cloudsync_memory_free(site_id); + return rc; +} + +static int cloudsync_payload_apply_fragment_row (cloudsync_context *data, cloudsync_payload_fragment_row *row, int *pnrows) { + char value_id[64]; + char checksum_hex[17]; + int part_index = 0, part_count = 0; + int64_t total_size = 0; + const char *base_col = NULL; + int64_t base_col_len = 0; + if (!row || !row->tbl || row->tbl_len <= 0 || !row->pk || row->pk_len <= 0 || + !row->col_name || row->col_name_len <= 0 || !row->col_value || row->col_value_len <= 0 || + !row->site_id || row->site_id_len <= 0) { + return cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid v3 payload row", DBRES_MISUSE); + } + if (!cloudsync_payload_fragment_parse_colname(row->col_name, row->col_name_len, value_id, sizeof(value_id), + checksum_hex, sizeof(checksum_hex), + &part_index, &part_count, &total_size, &base_col, &base_col_len)) { + return cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid v3 fragment metadata", DBRES_MISUSE); + } + + uint64_t value_checksum = strtoull(checksum_hex, NULL, 16); + char expected_value_id[33]; + cloudsync_payload_fragment_value_id(expected_value_id, row->tbl, (int)row->tbl_len, row->pk, (int)row->pk_len, + base_col, (int)base_col_len, row->col_version, row->db_version, + row->site_id, (int)row->site_id_len, row->cl, row->seq, + value_checksum, total_size); + if (strcmp(value_id, expected_value_id) != 0) { + return cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid v3 fragment identity", DBRES_MISUSE); + } + + int rc = database_exec(data, SQL_PAYLOAD_FRAGMENTS_CREATE_TABLE); + if (rc != DBRES_OK) return rc; + rc = cloudsync_payload_fragments_cleanup_stale(data); + if (rc != DBRES_OK) return rc; + + dbvm_t *vm = NULL; + rc = databasevm_prepare(data, SQL_PAYLOAD_FRAGMENTS_UPSERT, &vm, 0); + if (rc != DBRES_OK) return rc; + databasevm_bind_text(vm, 1, value_id, -1); + databasevm_bind_int(vm, 2, part_index); + databasevm_bind_int(vm, 3, part_count); + databasevm_bind_int(vm, 4, total_size); + databasevm_bind_text(vm, 5, checksum_hex, -1); + databasevm_bind_int(vm, 6, (int64_t)time(NULL)); + databasevm_bind_text(vm, 7, row->tbl, (int)row->tbl_len); + databasevm_bind_blob(vm, 8, row->pk, (uint64_t)row->pk_len); + databasevm_bind_text(vm, 9, base_col, (int)base_col_len); + databasevm_bind_int(vm, 10, row->col_version); + databasevm_bind_int(vm, 11, row->db_version); + databasevm_bind_blob(vm, 12, row->site_id, (uint64_t)row->site_id_len); + databasevm_bind_int(vm, 13, row->cl); + databasevm_bind_int(vm, 14, row->seq); + databasevm_bind_blob(vm, 15, row->col_value, (uint64_t)row->col_value_len); + rc = databasevm_step(vm); + databasevm_finalize(vm); + if (rc == DBRES_DONE) rc = DBRES_OK; + if (rc != DBRES_OK) return rc; + + return cloudsync_payload_apply_reassembled_fragment(data, value_id, checksum_hex, pnrows); +} + // #ifndef CLOUDSYNC_OMIT_RLS_VALIDATION int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int blen, int *pnrows) { @@ -3243,7 +4000,7 @@ int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int b if (header.schema_hash != data->schema_hash) { if (!database_check_schema_hash(data, header.schema_hash)) { char buffer[1024]; - snprintf(buffer, sizeof(buffer), "Cannot apply the received payload because the schema hash is unknown %llu.", header.schema_hash); + snprintf(buffer, sizeof(buffer), "Cannot apply the received payload because the schema hash is unknown %" PRIu64 ".", header.schema_hash); return cloudsync_set_error(data, buffer, DBRES_MISUSE); } } @@ -3253,6 +4010,9 @@ int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int b if ((header.signature != CLOUDSYNC_PAYLOAD_SIGNATURE) || (header.ncols == 0)) { return cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid signature or column size", DBRES_MISUSE); } + if (header.version < CLOUDSYNC_PAYLOAD_VERSION_1 || header.version > CLOUDSYNC_PAYLOAD_VERSION_3) { + return cloudsync_set_error(data, "Error on cloudsync_payload_apply: unsupported payload version", DBRES_MISUSE); + } const char *buffer = payload + sizeof(cloudsync_payload_header); size_t buf_len = (size_t)blen - sizeof(cloudsync_payload_header); @@ -3280,6 +4040,34 @@ int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int b buffer = (const char *)clone; buf_len = (size_t)header.expanded_size; } + + if (header.version == CLOUDSYNC_PAYLOAD_VERSION_3) { + int rc = DBRES_OK; + int applied_rows = 0; + if (header.ncols != CLOUDSYNC_CHANGES_NCOLS) { + if (clone) cloudsync_memory_free(clone); + return cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid v3 column count", DBRES_MISUSE); + } + for (uint32_t i = 0; i < header.nrows; ++i) { + size_t seek = 0; + cloudsync_payload_fragment_row row = {0}; + int res = pk_decode((char *)buffer, buf_len, header.ncols, &seek, -1, + cloudsync_payload_fragment_decode_callback, &row); + if (res == -1 || seek == 0 || seek > buf_len) { + rc = cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid v3 payload row", DBRES_MISUSE); + break; + } + int n = 0; + rc = cloudsync_payload_apply_fragment_row(data, &row, &n); + if (rc != DBRES_OK) break; + applied_rows += n; + buffer += seek; + buf_len -= seek; + } + if (clone) cloudsync_memory_free(clone); + if (pnrows) *pnrows = applied_rows; + return rc; + } // precompile the insert statement dbvm_t *vm = NULL; diff --git a/src/cloudsync.h b/src/cloudsync.h index 56c4d2b..6559881 100644 --- a/src/cloudsync.h +++ b/src/cloudsync.h @@ -18,7 +18,7 @@ extern "C" { #endif -#define CLOUDSYNC_VERSION "1.0.20" +#define CLOUDSYNC_VERSION "1.1.0" #define CLOUDSYNC_MAX_TABLENAME_LEN 512 #define CLOUDSYNC_VALUE_NOTSET -1 @@ -26,6 +26,9 @@ extern "C" { #define CLOUDSYNC_RLS_RESTRICTED_VALUE "__[RLS]__" #define CLOUDSYNC_DISABLE_ROWIDONLY_TABLES 1 #define CLOUDSYNC_DEFAULT_ALGO "cls" +#define CLOUDSYNC_PAYLOAD_CHUNK_DEFAULT_SIZE (5 * 1024 * 1024) +#define CLOUDSYNC_PAYLOAD_CHUNK_MIN_SIZE (256 * 1024) +#define CLOUDSYNC_PAYLOAD_CHUNK_SAFETY_MARGIN (16 * 1024) #define CLOUDSYNC_CHANGES_NCOLS 9 @@ -92,8 +95,35 @@ int cloudsync_payload_encode_step (cloudsync_payload_context *payload, clouds int cloudsync_payload_encode_final (cloudsync_payload_context *payload, cloudsync_context *data); char *cloudsync_payload_blob (cloudsync_payload_context *payload, int64_t *blob_size, int64_t *nrows); size_t cloudsync_payload_context_size (size_t *header_size); +uint64_t cloudsync_payload_context_nrows (cloudsync_payload_context *payload); +size_t cloudsync_payload_context_bused (cloudsync_payload_context *payload); int cloudsync_payload_get (cloudsync_context *data, char **blob, int *blob_size, int *db_version, int64_t *new_db_version); int cloudsync_payload_save (cloudsync_context *data, const char *payload_path, int *blob_size); // available only on Desktop OS (no WASM, no mobile) +int cloudsync_payload_max_chunk_size (cloudsync_context *data); +int cloudsync_payload_encode_fragment_step (cloudsync_payload_context *payload, cloudsync_context *data, + const char *tbl, int tbl_len, + const void *pk, int pk_len, + const char *col_name, int col_name_len, + const void *fragment, int fragment_len, + int64_t col_version, int64_t db_version, + const void *site_id, int site_id_len, + int64_t cl, int64_t seq, + uint64_t value_checksum, + int64_t total_size, + int part_index, int part_count); +int cloudsync_payload_fragment_target_size (cloudsync_context *data); +int cloudsync_payload_fragment_count (int64_t total_size, int target_size); +int cloudsync_payload_fragment_data_size (cloudsync_context *data, + const char *tbl, int tbl_len, + const void *pk, int pk_len, + const char *col_name, int col_name_len, + int64_t col_version, int64_t db_version, + const void *site_id, int site_id_len, + int64_t cl, int64_t seq, + int64_t total_size, + int part_index, int part_count); +uint64_t cloudsync_payload_encoded_value_checksum (dbvalue_t *value); +int cloudsync_payload_encoded_value_header (dbvalue_t *value, char *header, int header_cap, int64_t *payload_len); // CloudSync table context int cloudsync_refill_metatable (cloudsync_context *data, const char *table_name); diff --git a/src/dbutils.c b/src/dbutils.c index 4e565fe..f8ddd90 100644 --- a/src/dbutils.c +++ b/src/dbutils.c @@ -144,7 +144,13 @@ int dbutils_settings_get_value (cloudsync_context *data, const char *key, char * // INT case if (intvalue) { - *intvalue = database_column_int(vm, 0); + int type = database_column_type(vm, 0); + if (type == DBTYPE_TEXT) { + const char *value = database_column_text(vm, 0); + *intvalue = value ? strtoll(value, NULL, 0) : 0; + } else { + *intvalue = database_column_int(vm, 0); + } goto finalize_get_value; } diff --git a/src/dbutils.h b/src/dbutils.h index 472469a..f578cf0 100644 --- a/src/dbutils.h +++ b/src/dbutils.h @@ -26,6 +26,7 @@ #define CLOUDSYNC_KEY_DEBUG "debug" #define CLOUDSYNC_KEY_ALGO "algo" #define CLOUDSYNC_KEY_SKIP_SCHEMA_HASH_CHECK "skip_schema_hash_check" +#define CLOUDSYNC_KEY_PAYLOAD_MAX_CHUNK_SIZE "payload_max_chunk_size" // settings int dbutils_settings_init (cloudsync_context *data); diff --git a/src/network/network.c b/src/network/network.c index 652f96c..031151a 100644 --- a/src/network/network.c +++ b/src/network/network.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -1256,7 +1257,7 @@ static char *network_base64_encode(const unsigned char *src, size_t len) { } static char *network_apply_json_payload(const char *transport_key, const char *transport_value, - int db_version_min, int db_version_max) { + int64_t db_version_min, int64_t db_version_max) { if (!transport_key || !transport_value) return NULL; char *escaped_value = json_escape_string(transport_value); @@ -1270,13 +1271,121 @@ static char *network_apply_json_payload(const char *transport_key, const char *t } snprintf(json_payload, requested, - "{\"%s\":\"%s\", \"dbVersionMin\":%d, \"dbVersionMax\":%d}", + "{\"%s\":\"%s\", \"dbVersionMin\":%" PRId64 ", \"dbVersionMax\":%" PRId64 "}", transport_key, escaped_value, db_version_min, db_version_max); cloudsync_memory_free(escaped_value); return json_payload; } +static int network_send_payload_to_apply(sqlite3_context *context, network_data *netdata, + const void *blob, int blob_size, + int64_t db_version_min, int64_t db_version_max, + NETWORK_RESULT *res_out) { + memset(res_out, 0, sizeof(*res_out)); + if (!blob || blob_size <= 0) { + sqlite3_result_error(context, "cloudsync_network_send_changes: invalid empty payload chunk.", -1); + return SQLITE_ERROR; + } + + #ifdef CLOUDSYNC_NETWORK_TRACE + fprintf(stderr, + "[cloudsync-network] send_changes chunk_size=%d fast-lane:%s db_version_min=%" PRId64 " db_version_max=%" PRId64 "\n", + blob_size, + blob_size <= CLOUDSYNC_NETWORK_FAST_LANE_MAX_BLOB_SIZE ? "true" : "false", + db_version_min, + db_version_max); + #endif + + if (blob_size <= CLOUDSYNC_NETWORK_FAST_LANE_MAX_BLOB_SIZE) { + char *blob_base64 = network_base64_encode((const unsigned char *)blob, (size_t)blob_size); + if (!blob_base64) { + sqlite3_result_error(context, "cloudsync_network_send_changes: unable to encode payload chunk.", -1); + sqlite3_result_error_code(context, SQLITE_NOMEM); + return SQLITE_NOMEM; + } + + char *json_payload = network_apply_json_payload("blob", blob_base64, db_version_min, db_version_max); + cloudsync_memory_free(blob_base64); + if (!json_payload) { + sqlite3_result_error(context, "cloudsync_network_send_changes: unable to allocate apply request payload.", -1); + sqlite3_result_error_code(context, SQLITE_NOMEM); + return SQLITE_NOMEM; + } + + *res_out = network_receive_buffer(netdata, netdata->apply_endpoint, netdata->authentication, true, true, + json_payload, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers)); + cloudsync_memory_free(json_payload); + return SQLITE_OK; + } + + NETWORK_RESULT upload_res = network_receive_buffer(netdata, netdata->upload_endpoint, netdata->authentication, true, false, + NULL, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers)); + if (upload_res.code != CLOUDSYNC_NETWORK_BUFFER) { + network_result_to_sqlite_error(context, upload_res, "cloudsync_network_send_changes unable to receive upload URL"); + network_result_cleanup(&upload_res); + return SQLITE_ERROR; + } + + char *s3_url = json_extract_string(upload_res.buffer, upload_res.blen, "url"); + if (!s3_url) { + sqlite3_result_error(context, "cloudsync_network_send_changes: missing 'url' in upload response.", -1); + network_result_cleanup(&upload_res); + return SQLITE_ERROR; + } + + bool sent = network_send_buffer(netdata, s3_url, NULL, blob, blob_size); + if (sent == false) { + cloudsync_memory_free(s3_url); + network_result_to_sqlite_error(context, upload_res, "cloudsync_network_send_changes unable to upload payload chunk to remote host."); + network_result_cleanup(&upload_res); + return SQLITE_ERROR; + } + + char *json_payload = network_apply_json_payload("url", s3_url, db_version_min, db_version_max); + cloudsync_memory_free(s3_url); + if (!json_payload) { + sqlite3_result_error(context, "cloudsync_network_send_changes: unable to allocate apply request payload.", -1); + sqlite3_result_error_code(context, SQLITE_NOMEM); + network_result_cleanup(&upload_res); + return SQLITE_NOMEM; + } + + network_result_cleanup(&upload_res); + *res_out = network_receive_buffer(netdata, netdata->apply_endpoint, netdata->authentication, true, true, + json_payload, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers)); + cloudsync_memory_free(json_payload); + return SQLITE_OK; +} + +static void network_sync_state_update_from_response(NETWORK_RESULT *res, + int64_t *last_optimistic_version, + int64_t *last_confirmed_version, + int *gaps_size, + char **apply_failure_json, + char **check_failure_json) { + if (!res || res->code != CLOUDSYNC_NETWORK_BUFFER || !res->buffer) return; + + int64_t parsed_version = json_extract_int(res->buffer, res->blen, "lastOptimisticVersion", -1); + if (parsed_version > *last_optimistic_version) *last_optimistic_version = parsed_version; + parsed_version = json_extract_int(res->buffer, res->blen, "lastConfirmedVersion", -1); + if (parsed_version > *last_confirmed_version) *last_confirmed_version = parsed_version; + int parsed_gaps_size = json_extract_array_size(res->buffer, res->blen, "gaps"); + if (parsed_gaps_size >= 0) *gaps_size = parsed_gaps_size; + + char *apply_failure = json_extract_failure_stage(res->buffer, res->blen, "apply"); + if (apply_failure) { + if (*apply_failure_json) cloudsync_memory_free(*apply_failure_json); + *apply_failure_json = apply_failure; + } + + char *check_failure = json_extract_failure_stage(res->buffer, res->blen, "check"); + if (check_failure) { + if (*check_failure_json) cloudsync_memory_free(*check_failure_json); + *check_failure_json = check_failure; + } +} + static const char *network_compute_status(int64_t last_optimistic, int64_t last_confirmed, int gaps_size, int64_t local_version) { if (last_optimistic < 0 || last_confirmed < 0) return "error"; @@ -1326,6 +1435,8 @@ void cloudsync_network_has_unsent_changes (sqlite3_context *context, int argc, s int cloudsync_network_send_changes_internal (sqlite3_context *context, int argc, sqlite3_value **argv, sync_result *out) { DEBUG_FUNCTION("cloudsync_network_send_changes"); + UNUSED_PARAMETER(argc); + UNUSED_PARAMETER(argv); // retrieve global context cloudsync_context *data = (cloudsync_context *)sqlite3_user_data(context); @@ -1333,125 +1444,100 @@ int cloudsync_network_send_changes_internal (sqlite3_context *context, int argc, network_data *netdata = (network_data *)cloudsync_auxdata(data); if (!netdata) {sqlite3_result_error(context, "Unable to retrieve CloudSync network context.", -1); return SQLITE_ERROR;} - // retrieve payload - char *blob = NULL; - int blob_size = 0, db_version = 0; - int64_t new_db_version = 0; - int rc = cloudsync_payload_get(data, &blob, &blob_size, &db_version, &new_db_version); + int64_t db_version = dbutils_settings_get_int64_value(data, CLOUDSYNC_KEY_SEND_DBVERSION); + if (db_version < 0) { + sqlite3_result_error(context, "Unable to retrieve db_version.", -1); + return SQLITE_ERROR; + } + + sqlite3 *db = sqlite3_context_db_handle(context); + sqlite3_stmt *stmt = NULL; + const char *chunk_sql = + "SELECT payload, payload_size, db_version_min, db_version_max, watermark_db_version " + "FROM cloudsync_payload_chunks WHERE since_db_version = ?"; + int rc = sqlite3_prepare_v2(db, chunk_sql, -1, &stmt, NULL); if (rc != SQLITE_OK) { - if (db_version < 0) sqlite3_result_error(context, "Unable to retrieve db_version.", -1); - else sqlite3_result_error(context, "Unable to retrieve changes in cloudsync_network_send_changes", -1); + sqlite3_result_error(context, sqlite3_errmsg(db), -1); + sqlite3_result_error_code(context, rc); return rc; } - - // Case 1: empty local db — no payload and no server state, skip network entirely - if ((blob == NULL || blob_size == 0) && db_version == 0) { - if (out) { - out->server_version = 0; - out->local_version = 0; - out->status = network_compute_status(0, 0, 0, 0); - } - return SQLITE_OK; - } + sqlite3_bind_int64(stmt, 1, db_version); - NETWORK_RESULT res; - if (blob != NULL && blob_size > 0) { - int db_version_min = db_version+1; - int db_version_max = (int)new_db_version; - if (db_version_min > db_version_max) db_version_min = db_version_max; - - #ifdef CLOUDSYNC_NETWORK_TRACE - fprintf(stderr, - "[cloudsync-network] send_changes blob_size=%d fast-lane:%s\n", - blob_size, - blob_size <= CLOUDSYNC_NETWORK_FAST_LANE_MAX_BLOB_SIZE ? "true" : "false"); - #endif - - if (blob_size <= CLOUDSYNC_NETWORK_FAST_LANE_MAX_BLOB_SIZE) { - char *blob_base64 = network_base64_encode((const unsigned char *)blob, (size_t)blob_size); - cloudsync_memory_free(blob); - if (!blob_base64) { - sqlite3_result_error(context, "cloudsync_network_send_changes: unable to encode BLOB changes.", -1); - sqlite3_result_error_code(context, SQLITE_NOMEM); - return SQLITE_NOMEM; - } + int64_t new_db_version = db_version; + int64_t last_optimistic_version = -1; + int64_t last_confirmed_version = -1; + int gaps_size = -1; + char *apply_failure_json = NULL; + char *check_failure_json = NULL; + bool sent_any = false; + + while ((rc = sqlite3_step(stmt)) == SQLITE_ROW) { + const void *blob = sqlite3_column_blob(stmt, 0); + int blob_size = sqlite3_column_bytes(stmt, 0); + int64_t payload_size = sqlite3_column_int64(stmt, 1); + int64_t db_version_min = sqlite3_column_int64(stmt, 2); + int64_t db_version_max = sqlite3_column_int64(stmt, 3); + int64_t watermark = sqlite3_column_int64(stmt, 4); + + if (!blob || blob_size <= 0 || payload_size != blob_size || payload_size > INT_MAX || + db_version_min <= 0 || db_version_max <= 0 || db_version_min > db_version_max) { + sqlite3_result_error(context, "cloudsync_network_send_changes: invalid payload chunk generated.", -1); + rc = SQLITE_ERROR; + goto cleanup; + } - char *json_payload = network_apply_json_payload("blob", blob_base64, db_version_min, db_version_max); - cloudsync_memory_free(blob_base64); - if (!json_payload) { - sqlite3_result_error(context, "cloudsync_network_send_changes: unable to allocate apply request payload.", -1); - sqlite3_result_error_code(context, SQLITE_NOMEM); - return SQLITE_NOMEM; - } + NETWORK_RESULT res = {0}; + rc = network_send_payload_to_apply(context, netdata, blob, blob_size, db_version_min, db_version_max, &res); + if (rc != SQLITE_OK) goto cleanup; - res = network_receive_buffer(netdata, netdata->apply_endpoint, netdata->authentication, true, true, json_payload, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers)); - cloudsync_memory_free(json_payload); - } else { - // bulk lane: stage the payload through the upload endpoint and apply by URL - res = network_receive_buffer(netdata, netdata->upload_endpoint, netdata->authentication, true, false, NULL, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers)); - if (res.code != CLOUDSYNC_NETWORK_BUFFER) { - cloudsync_memory_free(blob); - network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to receive upload URL"); - network_result_cleanup(&res); - return SQLITE_ERROR; - } - - char *s3_url = json_extract_string(res.buffer, res.blen, "url"); - if (!s3_url) { - cloudsync_memory_free(blob); - sqlite3_result_error(context, "cloudsync_network_send_changes: missing 'url' in upload response.", -1); - network_result_cleanup(&res); - return SQLITE_ERROR; - } - bool sent = network_send_buffer(netdata, s3_url, NULL, blob, blob_size); - cloudsync_memory_free(blob); - if (sent == false) { - cloudsync_memory_free(s3_url); - network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to upload BLOB changes to remote host."); - network_result_cleanup(&res); - return SQLITE_ERROR; - } + if (res.code == CLOUDSYNC_NETWORK_BUFFER && res.buffer) { + network_sync_state_update_from_response(&res, &last_optimistic_version, &last_confirmed_version, &gaps_size, + &apply_failure_json, &check_failure_json); + } else if (res.code != CLOUDSYNC_NETWORK_OK) { + network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to apply changes to remote host."); + network_result_cleanup(&res); + rc = SQLITE_ERROR; + goto cleanup; + } + network_result_cleanup(&res); - char *json_payload = network_apply_json_payload("url", s3_url, db_version_min, db_version_max); - cloudsync_memory_free(s3_url); - if (!json_payload) { - sqlite3_result_error(context, "cloudsync_network_send_changes: unable to allocate apply request payload.", -1); - sqlite3_result_error_code(context, SQLITE_NOMEM); - network_result_cleanup(&res); - return SQLITE_NOMEM; + sent_any = true; + if (watermark > new_db_version) new_db_version = watermark; + } + if (rc != SQLITE_DONE) { + sqlite3_result_error(context, sqlite3_errmsg(db), -1); + sqlite3_result_error_code(context, rc); + goto cleanup; + } + sqlite3_finalize(stmt); + stmt = NULL; + + if (!sent_any) { + // Empty local db with no server state: preserve the previous fast no-op path. + if (db_version == 0) { + if (out) { + out->server_version = 0; + out->local_version = 0; + out->status = network_compute_status(0, 0, 0, 0); } + rc = SQLITE_OK; + goto cleanup; + } - // free res + NETWORK_RESULT res = network_receive_buffer(netdata, netdata->status_endpoint, netdata->authentication, true, false, + NULL, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers)); + if (res.code == CLOUDSYNC_NETWORK_BUFFER && res.buffer) { + network_sync_state_update_from_response(&res, &last_optimistic_version, &last_confirmed_version, &gaps_size, + &apply_failure_json, &check_failure_json); + } else if (res.code != CLOUDSYNC_NETWORK_OK) { + network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to apply changes to remote host."); network_result_cleanup(&res); - - // notify remote host that we successfully uploaded changes - res = network_receive_buffer(netdata, netdata->apply_endpoint, netdata->authentication, true, true, json_payload, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers)); - cloudsync_memory_free(json_payload); + rc = SQLITE_ERROR; + goto cleanup; } - } else { - // there is no data to send, just check the status to update the db_version value in settings and to reply the status - new_db_version = db_version; - res = network_receive_buffer(netdata, netdata->status_endpoint, netdata->authentication, true, false, NULL, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers)); - } - - int64_t last_optimistic_version = -1; - int64_t last_confirmed_version = -1; - int gaps_size = -1; - char *apply_failure_json = NULL; - char *check_failure_json = NULL; - - if (res.code == CLOUDSYNC_NETWORK_BUFFER && res.buffer) { - last_optimistic_version = json_extract_int(res.buffer, res.blen, "lastOptimisticVersion", -1); - last_confirmed_version = json_extract_int(res.buffer, res.blen, "lastConfirmedVersion", -1); - gaps_size = json_extract_array_size(res.buffer, res.blen, "gaps"); - if (gaps_size < 0) gaps_size = 0; - apply_failure_json = json_extract_failure_stage(res.buffer, res.blen, "apply"); - check_failure_json = json_extract_failure_stage(res.buffer, res.blen, "check"); - } else if (res.code != CLOUDSYNC_NETWORK_OK) { - network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to apply changes to remote host."); network_result_cleanup(&res); - return SQLITE_ERROR; } + if (gaps_size < 0) gaps_size = 0; // update db_version in settings char buf[256]; @@ -1477,9 +1563,13 @@ int cloudsync_network_send_changes_internal (sqlite3_context *context, int argc, } if (apply_failure_json) cloudsync_memory_free(apply_failure_json); if (check_failure_json) cloudsync_memory_free(check_failure_json); - - network_result_cleanup(&res); return SQLITE_OK; + +cleanup: + if (stmt) sqlite3_finalize(stmt); + if (apply_failure_json) cloudsync_memory_free(apply_failure_json); + if (check_failure_json) cloudsync_memory_free(check_failure_json); + return rc == SQLITE_DONE ? SQLITE_OK : rc; } void cloudsync_network_send_changes (sqlite3_context *context, int argc, sqlite3_value **argv) { diff --git a/src/pk.c b/src/pk.c index 97a6639..dcc8ca6 100644 --- a/src/pk.c +++ b/src/pk.c @@ -423,6 +423,56 @@ size_t pk_encode_data (char *buffer, size_t bseek, char *data, size_t datalen) { memcpy(buffer + bseek, data, datalen); return bseek + datalen; } + +size_t pk_encode_raw_size (int type, int64_t len_or_value) { + switch (type) { + case DBTYPE_INTEGER: { + if (len_or_value == INT64_MIN) return 1; + if (len_or_value < 0) len_or_value = -len_or_value; + return 1 + pk_encode_nbytes_needed(len_or_value); + } + case DBTYPE_FLOAT: + return 1 + sizeof(uint64_t); + case DBTYPE_TEXT: + case DBTYPE_BLOB: { + if (len_or_value < 0) return SIZE_MAX; + size_t nbytes = pk_encode_nbytes_needed(len_or_value); + return 1 + nbytes + (size_t)len_or_value; + } + case DBTYPE_NULL: + return 1; + } + return SIZE_MAX; +} + +size_t pk_encode_raw_int (char *buffer, int64_t value) { + int type = DBTYPE_INTEGER; + size_t bseek = 0; + if (value == INT64_MIN) { + return pk_encode_u8(buffer, bseek, DATABASE_TYPE_MAX_NEGATIVE_INTEGER); + } + if (value < 0) { value = -value; type = DATABASE_TYPE_NEGATIVE_INTEGER; } + size_t nbytes = pk_encode_nbytes_needed(value); + uint8_t type_byte = (uint8_t)((nbytes << 3) | type); + bseek = pk_encode_u8(buffer, bseek, type_byte); + return pk_encode_uint64(buffer, bseek, (uint64_t)value, nbytes); +} + +size_t pk_encode_raw_text (char *buffer, const char *value, size_t len) { + size_t nbytes = pk_encode_nbytes_needed((int64_t)len); + uint8_t type_byte = (uint8_t)((nbytes << 3) | DBTYPE_TEXT); + size_t bseek = pk_encode_u8(buffer, 0, type_byte); + bseek = pk_encode_uint64(buffer, bseek, (uint64_t)len, nbytes); + return pk_encode_data(buffer, bseek, (char *)value, len); +} + +size_t pk_encode_raw_blob (char *buffer, const void *value, size_t len) { + size_t nbytes = pk_encode_nbytes_needed((int64_t)len); + uint8_t type_byte = (uint8_t)((nbytes << 3) | DBTYPE_BLOB); + size_t bseek = pk_encode_u8(buffer, 0, type_byte); + bseek = pk_encode_uint64(buffer, bseek, (uint64_t)len, nbytes); + return pk_encode_data(buffer, bseek, (char *)value, len); +} char *pk_encode (dbvalue_t **argv, int argc, char *b, bool is_prikey, size_t *bsize, int skip_idx) { size_t bseek = 0; diff --git a/src/pk.h b/src/pk.h index ea9a390..ab61b9f 100644 --- a/src/pk.h +++ b/src/pk.h @@ -26,5 +26,9 @@ int pk_decode_bind_callback (void *xdata, int index, int type, int64_t ival, int pk_decode_print_callback (void *xdata, int index, int type, int64_t ival, double dval, char *pval); size_t pk_encode_size (dbvalue_t **argv, int argc, int reserved, int skip_idx); uint64_t pk_checksum (const char *buffer, size_t blen); +size_t pk_encode_raw_size (int type, int64_t len_or_value); +size_t pk_encode_raw_int (char *buffer, int64_t value); +size_t pk_encode_raw_text (char *buffer, const char *value, size_t len); +size_t pk_encode_raw_blob (char *buffer, const void *value, size_t len); #endif diff --git a/src/postgresql/cloudsync.sql.in b/src/postgresql/cloudsync.sql.in index edfa4d3..89665d4 100644 --- a/src/postgresql/cloudsync.sql.in +++ b/src/postgresql/cloudsync.sql.in @@ -149,6 +149,23 @@ CREATE OR REPLACE AGGREGATE cloudsync_payload_encode(text, bytea, text, bytea, b FINALFUNC = cloudsync_payload_encode_finalfn ); +CREATE OR REPLACE FUNCTION cloudsync_payload_chunks( + since_db_version bigint DEFAULT NULL, + filter_site_id bytea DEFAULT NULL, + until_db_version bigint DEFAULT NULL +) +RETURNS TABLE ( + payload bytea, + chunk_index bigint, + payload_size bigint, + rows bigint, + db_version_min bigint, + db_version_max bigint, + watermark_db_version bigint +) +AS 'MODULE_PATHNAME', 'cloudsync_payload_chunks' +LANGUAGE C VOLATILE; + -- Payload decoding and application CREATE OR REPLACE FUNCTION cloudsync_payload_decode(payload bytea) RETURNS integer diff --git a/src/postgresql/cloudsync_postgresql.c b/src/postgresql/cloudsync_postgresql.c index 4d0ed6a..af0ce77 100644 --- a/src/postgresql/cloudsync_postgresql.c +++ b/src/postgresql/cloudsync_postgresql.c @@ -1002,6 +1002,343 @@ Datum cloudsync_payload_encode_finalfn (PG_FUNCTION_ARGS) { PG_RETURN_BYTEA_P(result); } +typedef struct { + Portal portal; + TupleDesc outdesc; + SPITupleTable *current_tuptable; + bool spi_connected; + bool has_current; + bool eof; + int64 chunk_index; + int64 watermark; + int max_size; + int frag_target; + + char *tbl; + bytea *pk; + char *col_name; + bytea *col_value; + bool col_value_owned; + int64 col_version; + int64 db_version; + bytea *site_id; + int64 cl; + int64 seq; + + bool frag_active; + int frag_part; + int frag_count; + int64 frag_offset; + int64 frag_total; + uint64 frag_checksum; +} PayloadChunksState; + +static void payload_chunks_free_current(PayloadChunksState *st) { + if (!st) return; + if (st->tbl) pfree(st->tbl); + if (st->pk) pfree(st->pk); + if (st->col_name) pfree(st->col_name); + if (st->col_value && st->col_value_owned) pfree(st->col_value); + if (st->site_id) pfree(st->site_id); + if (st->current_tuptable) SPI_freetuptable(st->current_tuptable); + st->tbl = NULL; + st->pk = NULL; + st->col_name = NULL; + st->col_value = NULL; + st->col_value_owned = false; + st->site_id = NULL; + st->current_tuptable = NULL; + st->has_current = false; +} + +static bool payload_chunks_fetch_current(PayloadChunksState *st) { + if (st->has_current) return true; + if (st->eof) return false; + SPI_cursor_fetch(st->portal, true, 1); + if (SPI_processed == 0) { + if (SPI_tuptable) { SPI_freetuptable(SPI_tuptable); SPI_tuptable = NULL; } + st->eof = true; + return false; + } + + st->current_tuptable = SPI_tuptable; + HeapTuple tup = SPI_tuptable->vals[0]; + TupleDesc td = SPI_tuptable->tupdesc; + bool isnull = false; + Datum d; + + d = SPI_getbinval(tup, td, 1, &isnull); + st->tbl = isnull ? pstrdup("") : text_to_cstring(DatumGetTextPP(d)); + d = SPI_getbinval(tup, td, 2, &isnull); + if (!isnull) { + bytea *b = DatumGetByteaPP(d); + size_t n = VARSIZE_ANY(b); + st->pk = (bytea *)palloc(n); + memcpy(st->pk, b, n); + } + d = SPI_getbinval(tup, td, 3, &isnull); + st->col_name = isnull ? pstrdup("") : text_to_cstring(DatumGetTextPP(d)); + d = SPI_getbinval(tup, td, 4, &isnull); + if (!isnull) { + bytea *b = DatumGetByteaPP(d); + st->col_value = b; + st->col_value_owned = ((Pointer) b != DatumGetPointer(d)); + } + d = SPI_getbinval(tup, td, 5, &isnull); st->col_version = isnull ? 0 : DatumGetInt64(d); + d = SPI_getbinval(tup, td, 6, &isnull); st->db_version = isnull ? 0 : DatumGetInt64(d); + d = SPI_getbinval(tup, td, 7, &isnull); + if (!isnull) { + bytea *b = DatumGetByteaPP(d); + size_t n = VARSIZE_ANY(b); + st->site_id = (bytea *)palloc(n); + memcpy(st->site_id, b, n); + } + d = SPI_getbinval(tup, td, 8, &isnull); st->cl = isnull ? 0 : DatumGetInt64(d); + d = SPI_getbinval(tup, td, 9, &isnull); st->seq = isnull ? 0 : DatumGetInt64(d); + + SPI_tuptable = NULL; + st->has_current = true; + return true; +} + +static void payload_chunks_make_pgvalues(PayloadChunksState *st, pgvalue_t **vals, text **owned_texts) { + owned_texts[0] = cstring_to_text(st->tbl); + owned_texts[1] = cstring_to_text(st->col_name); + vals[0] = pgvalue_create(PointerGetDatum(owned_texts[0]), TEXTOID, -1, InvalidOid, false); + vals[1] = pgvalue_create(PointerGetDatum(st->pk), BYTEAOID, -1, InvalidOid, false); + vals[2] = pgvalue_create(PointerGetDatum(owned_texts[1]), TEXTOID, -1, InvalidOid, false); + vals[3] = pgvalue_create(PointerGetDatum(st->col_value), BYTEAOID, -1, InvalidOid, false); + vals[4] = pgvalue_create(Int64GetDatum(st->col_version), INT8OID, -1, InvalidOid, false); + vals[5] = pgvalue_create(Int64GetDatum(st->db_version), INT8OID, -1, InvalidOid, false); + vals[6] = pgvalue_create(PointerGetDatum(st->site_id), BYTEAOID, -1, InvalidOid, false); + vals[7] = pgvalue_create(Int64GetDatum(st->cl), INT8OID, -1, InvalidOid, false); + vals[8] = pgvalue_create(Int64GetDatum(st->seq), INT8OID, -1, InvalidOid, false); +} + +static void payload_chunks_free_pgvalues(pgvalue_t **vals, text **owned_texts) { + for (int i = 0; i < 9; ++i) if (vals[i]) pgvalue_free(vals[i]); + if (owned_texts[0]) pfree(owned_texts[0]); + if (owned_texts[1]) pfree(owned_texts[1]); +} + +static bytea *payload_chunks_emit_pg_fragment(PayloadChunksState *st, cloudsync_context *data, + int64 *rows, int64 *dbv_min, int64 *dbv_max) { + int64 remaining = st->frag_total - st->frag_offset; + int frag_len = remaining > st->frag_target ? st->frag_target : (int)remaining; + if (frag_len <= 0) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("invalid payload fragment size"))); + const char *src = VARDATA_ANY(st->col_value) + st->frag_offset; + + cloudsync_payload_context *payload = cloudsync_memory_zeroalloc((uint64_t)cloudsync_payload_context_size(NULL)); + if (!payload) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); + int rc = cloudsync_payload_encode_fragment_step(payload, data, + st->tbl, -1, + VARDATA_ANY(st->pk), VARSIZE_ANY_EXHDR(st->pk), + st->col_name, -1, + src, frag_len, + st->col_version, st->db_version, + VARDATA_ANY(st->site_id), VARSIZE_ANY_EXHDR(st->site_id), + st->cl, st->seq, + st->frag_checksum, st->frag_total, st->frag_part, st->frag_count); + if (rc != DBRES_OK) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("%s", cloudsync_errmsg(data)))); + rc = cloudsync_payload_encode_final(payload, data); + if (rc != DBRES_OK) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("%s", cloudsync_errmsg(data)))); + int64 blob_size = 0; + char *blob = cloudsync_payload_blob(payload, &blob_size, rows); + bytea *result = (bytea *)palloc(VARHDRSZ + blob_size); + SET_VARSIZE(result, VARHDRSZ + blob_size); + memcpy(VARDATA(result), blob, blob_size); + cloudsync_memory_free(blob); + cloudsync_memory_free(payload); + + *dbv_min = st->db_version; + *dbv_max = st->db_version; + st->frag_offset += frag_len; + st->frag_part++; + if (st->frag_part >= st->frag_count) { + st->frag_active = false; + payload_chunks_free_current(st); + } + return result; +} + +static bytea *payload_chunks_build_pg_next(PayloadChunksState *st, cloudsync_context *data, + int64 *rows, int64 *dbv_min, int64 *dbv_max) { + *rows = *dbv_min = *dbv_max = 0; + if (st->frag_active) return payload_chunks_emit_pg_fragment(st, data, rows, dbv_min, dbv_max); + if (!payload_chunks_fetch_current(st)) return NULL; + + size_t header_size = 0; + cloudsync_payload_context_size(&header_size); + cloudsync_payload_context *payload = cloudsync_memory_zeroalloc((uint64_t)cloudsync_payload_context_size(NULL)); + if (!payload) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); + + while (payload_chunks_fetch_current(st)) { + size_t row_size = 0; + { + pgvalue_t *vals[9] = {0}; + text *owned_texts[2] = {0}; + payload_chunks_make_pgvalues(st, vals, owned_texts); + row_size = pk_encode_size((dbvalue_t **)vals, 9, 0, 3); + payload_chunks_free_pgvalues(vals, owned_texts); + } + if (row_size == SIZE_MAX) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("payload row too large"))); + + if ((int64)row_size + (int64)header_size + CLOUDSYNC_PAYLOAD_CHUNK_SAFETY_MARGIN > st->max_size) { + if (cloudsync_payload_context_nrows(payload) > 0) break; + st->frag_total = VARSIZE_ANY_EXHDR(st->col_value); + st->frag_offset = 0; + st->frag_part = 0; + st->frag_target = cloudsync_payload_fragment_data_size(data, + st->tbl, -1, + VARDATA_ANY(st->pk), VARSIZE_ANY_EXHDR(st->pk), + st->col_name, -1, + st->col_version, st->db_version, + VARDATA_ANY(st->site_id), VARSIZE_ANY_EXHDR(st->site_id), + st->cl, st->seq, + st->frag_total, 0, 1); + if (st->frag_target <= 0) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("payload fragment metadata exceeds max chunk size"))); + for (int i = 0; i < 8; ++i) { + int count = cloudsync_payload_fragment_count(st->frag_total, st->frag_target); + if (count <= 0) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("payload requires too many fragments"))); + int planned = cloudsync_payload_fragment_data_size(data, + st->tbl, -1, + VARDATA_ANY(st->pk), VARSIZE_ANY_EXHDR(st->pk), + st->col_name, -1, + st->col_version, st->db_version, + VARDATA_ANY(st->site_id), VARSIZE_ANY_EXHDR(st->site_id), + st->cl, st->seq, + st->frag_total, count - 1, count); + if (planned <= 0) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("payload fragment metadata exceeds max chunk size"))); + if (planned == st->frag_target) break; + st->frag_target = planned; + } + st->frag_count = cloudsync_payload_fragment_count(st->frag_total, st->frag_target); + if (st->frag_count <= 0) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("payload requires too many fragments"))); + st->frag_checksum = pk_checksum(VARDATA_ANY(st->col_value), (size_t)st->frag_total); + st->frag_active = true; + cloudsync_memory_free(payload); + return payload_chunks_emit_pg_fragment(st, data, rows, dbv_min, dbv_max); + } + + if (cloudsync_payload_context_nrows(payload) > 0 && cloudsync_payload_context_bused(payload) + row_size > (size_t)st->max_size) break; + + pgvalue_t *vals[9] = {0}; + text *owned_texts[2] = {0}; + payload_chunks_make_pgvalues(st, vals, owned_texts); + int rc = cloudsync_payload_encode_step(payload, data, 9, (dbvalue_t **)vals); + payload_chunks_free_pgvalues(vals, owned_texts); + if (rc != DBRES_OK) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("%s", cloudsync_errmsg(data)))); + + if (cloudsync_payload_context_nrows(payload) == 1) *dbv_min = st->db_version; + *dbv_max = st->db_version; + payload_chunks_free_current(st); + } + + if (cloudsync_payload_context_nrows(payload) == 0) { + cloudsync_memory_free(payload); + return NULL; + } + int rc = cloudsync_payload_encode_final(payload, data); + if (rc != DBRES_OK) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("%s", cloudsync_errmsg(data)))); + int64 blob_size = 0; + char *blob = cloudsync_payload_blob(payload, &blob_size, rows); + bytea *result = (bytea *)palloc(VARHDRSZ + blob_size); + SET_VARSIZE(result, VARHDRSZ + blob_size); + memcpy(VARDATA(result), blob, blob_size); + cloudsync_memory_free(blob); + cloudsync_memory_free(payload); + return result; +} + +PG_FUNCTION_INFO_V1(cloudsync_payload_chunks); +Datum cloudsync_payload_chunks(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + cloudsync_context *data = get_cloudsync_context(); + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + MemoryContext oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + PayloadChunksState *st = palloc0(sizeof(*st)); + st->chunk_index = 0; + + if (SPI_connect() != SPI_OK_CONNECT) ereport(ERROR, (errmsg("SPI_connect failed"))); + st->spi_connected = true; + st->max_size = cloudsync_payload_max_chunk_size(data); + size_t header_size_tmp = 0; + cloudsync_payload_context_size(&header_size_tmp); + st->frag_target = st->max_size - (int)header_size_tmp - CLOUDSYNC_PAYLOAD_CHUNK_SAFETY_MARGIN; + if (st->frag_target < 1024) st->frag_target = 1024; + + int64 since = PG_ARGISNULL(0) ? dbutils_settings_get_int64_value(data, CLOUDSYNC_KEY_SEND_DBVERSION) : PG_GETARG_INT64(0); + bytea *site_id = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_PP(1); + if (!site_id) { + site_id = (bytea *)palloc(VARHDRSZ + UUID_LEN); + SET_VARSIZE(site_id, VARHDRSZ + UUID_LEN); + memcpy(VARDATA(site_id), cloudsync_siteid(data), UUID_LEN); + } + + int64 until = PG_ARGISNULL(2) ? 0 : PG_GETARG_INT64(2); + if (until == 0) { + Oid mt[1] = {BYTEAOID}; + Datum mv[1] = {PointerGetDatum(site_id)}; + char mn[1] = {' '}; + int mrc = SPI_execute_with_args("SELECT COALESCE(MAX(db_version),0) FROM cloudsync_changes_select(0,$1)", 1, mt, mv, mn, true, 1); + if (mrc == SPI_OK_SELECT && SPI_processed > 0) { + bool isnull = false; + Datum d = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull); + until = isnull ? 0 : DatumGetInt64(d); + } + if (SPI_tuptable) { SPI_freetuptable(SPI_tuptable); SPI_tuptable = NULL; } + } + st->watermark = until; + + StringInfoData q; + initStringInfo(&q); + appendStringInfoString(&q, + "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq " + "FROM cloudsync_changes_select($1,$2) WHERE db_version <= $3 ORDER BY db_version, seq ASC"); + Oid argtypes[3] = {INT8OID, BYTEAOID, INT8OID}; + Datum values[3] = {Int64GetDatum(since), PointerGetDatum(site_id), Int64GetDatum(until)}; + char nulls[3] = {' ', ' ', ' '}; + st->portal = SPI_cursor_open_with_args(NULL, q.data, 3, argtypes, values, nulls, true, 0); + pfree(q.data); + if (!st->portal) ereport(ERROR, (errmsg("SPI_cursor_open failed"))); + + TupleDesc outdesc; + if (get_call_result_type(fcinfo, NULL, &outdesc) != TYPEFUNC_COMPOSITE) ereport(ERROR, (errmsg("return type must be composite"))); + st->outdesc = BlessTupleDesc(outdesc); + funcctx->user_fctx = st; + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + PayloadChunksState *st = (PayloadChunksState *)funcctx->user_fctx; + + int64 rows = 0, dbv_min = 0, dbv_max = 0; + bytea *payload = payload_chunks_build_pg_next(st, data, &rows, &dbv_min, &dbv_max); + if (!payload) { + if (st->portal) SPI_cursor_close(st->portal); + st->portal = NULL; + if (st->spi_connected) SPI_finish(); + st->spi_connected = false; + payload_chunks_free_current(st); + MemoryContextSwitchTo(fcinfo->flinfo->fn_mcxt); + SRF_RETURN_DONE(funcctx); + } + + Datum outvals[7]; + bool outnulls[7] = {false,false,false,false,false,false,false}; + outvals[0] = PointerGetDatum(payload); + outvals[1] = Int64GetDatum(st->chunk_index++); + outvals[2] = Int64GetDatum(VARSIZE_ANY_EXHDR(payload)); + outvals[3] = Int64GetDatum(rows); + outvals[4] = Int64GetDatum(dbv_min); + outvals[5] = Int64GetDatum(dbv_max); + outvals[6] = Int64GetDatum(st->watermark); + HeapTuple outtup = heap_form_tuple(st->outdesc, outvals, outnulls); + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(outtup)); +} + // Payload decode - Apply changes from payload PG_FUNCTION_INFO_V1(cloudsync_payload_decode); Datum cloudsync_payload_decode (PG_FUNCTION_ARGS) { @@ -1009,7 +1346,7 @@ Datum cloudsync_payload_decode (PG_FUNCTION_ARGS) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("payload cannot be NULL"))); } - bytea *payload_data = PG_GETARG_BYTEA_P(0); + bytea *payload_data = PG_GETARG_BYTEA_P_COPY(0); int blen = VARSIZE(payload_data) - VARHDRSZ; // Sanity check payload size @@ -1037,6 +1374,7 @@ Datum cloudsync_payload_decode (PG_FUNCTION_ARGS) { } PG_CATCH(); { + if (payload_data) pfree(payload_data); if (spi_connected) SPI_finish(); PG_RE_THROW(); } @@ -1044,8 +1382,10 @@ Datum cloudsync_payload_decode (PG_FUNCTION_ARGS) { if (spi_connected) SPI_finish(); if (rc != DBRES_OK) { + if (payload_data) pfree(payload_data); ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("%s", cloudsync_errmsg(data)))); } + if (payload_data) pfree(payload_data); PG_RETURN_INT32(nrows); } @@ -2796,6 +3136,7 @@ Datum cloudsync_changes_select(PG_FUNCTION_ARGS) { PG_RE_THROW(); } PG_END_TRY(); + PG_RETURN_NULL(); } // Trigger INSERT diff --git a/src/postgresql/sql_postgresql.c b/src/postgresql/sql_postgresql.c index 44ea2c1..eaf2c35 100644 --- a/src/postgresql/sql_postgresql.c +++ b/src/postgresql/sql_postgresql.c @@ -105,6 +105,42 @@ const char * const SQL_CHANGES_INSERT_ROW = "INSERT INTO cloudsync_changes(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) " "VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9);"; +const char * const SQL_PAYLOAD_FRAGMENTS_CREATE_TABLE = + "CREATE TABLE IF NOT EXISTS cloudsync_payload_fragments (" + "value_id TEXT NOT NULL, part_index BIGINT NOT NULL, part_count BIGINT NOT NULL, total_size BIGINT NOT NULL, " + "checksum TEXT NOT NULL, created_at BIGINT NOT NULL DEFAULT (EXTRACT(EPOCH FROM now())::bigint), " + "tbl TEXT NOT NULL, pk BYTEA NOT NULL, col_name TEXT NOT NULL, col_version BIGINT NOT NULL, db_version BIGINT NOT NULL, " + "site_id BYTEA NOT NULL, cl BIGINT NOT NULL, seq BIGINT NOT NULL, fragment BYTEA NOT NULL, " + "PRIMARY KEY(value_id, part_index));"; + +const char * const SQL_PAYLOAD_FRAGMENTS_UPSERT = + "INSERT INTO cloudsync_payload_fragments " + "(value_id, part_index, part_count, total_size, checksum, created_at, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, fragment) " + "VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15) " + "ON CONFLICT (value_id, part_index) DO UPDATE SET " + "part_count=EXCLUDED.part_count,total_size=EXCLUDED.total_size,checksum=EXCLUDED.checksum,created_at=EXCLUDED.created_at," + "tbl=EXCLUDED.tbl,pk=EXCLUDED.pk," + "col_name=EXCLUDED.col_name,col_version=EXCLUDED.col_version,db_version=EXCLUDED.db_version," + "site_id=EXCLUDED.site_id,cl=EXCLUDED.cl,seq=EXCLUDED.seq,fragment=EXCLUDED.fragment;"; + +const char * const SQL_PAYLOAD_FRAGMENTS_COUNT = + "SELECT COUNT(*), MIN(part_count), MAX(part_count), MIN(total_size), MAX(total_size), " + "MIN(checksum), MAX(checksum), MIN(part_index), MAX(part_index) " + "FROM cloudsync_payload_fragments WHERE value_id=$1;"; + +const char * const SQL_PAYLOAD_FRAGMENTS_SELECT = + "SELECT fragment, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, checksum " + "FROM cloudsync_payload_fragments WHERE value_id=$1 ORDER BY part_index ASC;"; + +const char * const SQL_PAYLOAD_FRAGMENTS_DELETE = + "DELETE FROM cloudsync_payload_fragments WHERE value_id=$1;"; + +const char * const SQL_PAYLOAD_FRAGMENTS_CLEANUP_STALE = + "DELETE FROM cloudsync_payload_fragments " + "WHERE created_at < $1 AND value_id IN (" + "SELECT value_id FROM cloudsync_payload_fragments GROUP BY value_id " + "HAVING COUNT(*) < MAX(part_count));"; + // MARK: Additional SQL constants for PostgreSQL const char * const SQL_SITEID_SELECT_ROWID0 = diff --git a/src/sql.h b/src/sql.h index d9b9f0d..6837121 100644 --- a/src/sql.h +++ b/src/sql.h @@ -67,6 +67,12 @@ extern const char * const SQL_CLOUDSYNC_INSERT_MISSING_PKS_FROM_BASE_EXCEPT_SYNC extern const char * const SQL_CLOUDSYNC_SELECT_PKS_NOT_IN_SYNC_FOR_COL; extern const char * const SQL_CLOUDSYNC_SELECT_PKS_NOT_IN_SYNC_FOR_COL_FILTERED; extern const char * const SQL_CHANGES_INSERT_ROW; +extern const char * const SQL_PAYLOAD_FRAGMENTS_CREATE_TABLE; +extern const char * const SQL_PAYLOAD_FRAGMENTS_UPSERT; +extern const char * const SQL_PAYLOAD_FRAGMENTS_COUNT; +extern const char * const SQL_PAYLOAD_FRAGMENTS_SELECT; +extern const char * const SQL_PAYLOAD_FRAGMENTS_DELETE; +extern const char * const SQL_PAYLOAD_FRAGMENTS_CLEANUP_STALE; // BLOCKS (block-level LWW) extern const char * const SQL_BLOCKS_CREATE_TABLE; diff --git a/src/sqlite/cloudsync_sqlite.c b/src/sqlite/cloudsync_sqlite.c index bdff56b..15af3a9 100644 --- a/src/sqlite/cloudsync_sqlite.c +++ b/src/sqlite/cloudsync_sqlite.c @@ -12,6 +12,7 @@ #include "../block.h" #include "../database.h" #include "../dbutils.h" +#include #ifndef CLOUDSYNC_OMIT_NETWORK #include "../network/network.h" @@ -1076,6 +1077,345 @@ void dbsync_payload_decode (sqlite3_context *context, int argc, sqlite3_value ** sqlite3_result_int(context, nrows); } +typedef struct { + sqlite3_vtab base; + sqlite3 *db; + cloudsync_context *data; +} cloudsync_payload_chunks_vtab; + +typedef struct { + sqlite3_vtab_cursor base; + cloudsync_payload_chunks_vtab *vtab; + sqlite3_stmt *src; + bool eof; + bool has_row; + int chunk_index; + char *payload; + int64_t payload_size; + int64_t rows; + int64_t dbv_min; + int64_t dbv_max; + int64_t watermark; + bool frag_active; + int frag_part; + int frag_count; + int frag_target; + int64_t frag_offset; + int64_t frag_total; + uint64_t frag_checksum; + char value_header[16]; + int value_header_len; + const char *value_data; + int64_t value_data_len; +} cloudsync_payload_chunks_cursor; + +static int payload_chunks_connect(sqlite3 *db, void *aux, int argc, const char *const *argv, sqlite3_vtab **vtab, char **err) { + UNUSED_PARAMETER(argc); UNUSED_PARAMETER(argv); UNUSED_PARAMETER(err); + int rc = sqlite3_declare_vtab(db, + "CREATE TABLE x(payload BLOB, chunk_index INTEGER, payload_size INTEGER, rows INTEGER, " + "db_version_min INTEGER, db_version_max INTEGER, watermark_db_version INTEGER, " + "since_db_version HIDDEN, site_id HIDDEN, until_db_version HIDDEN)"); + if (rc != SQLITE_OK) return rc; + cloudsync_payload_chunks_vtab *p = sqlite3_malloc64(sizeof(*p)); + if (!p) return SQLITE_NOMEM; + memset(p, 0, sizeof(*p)); + p->db = db; + p->data = (cloudsync_context *)aux; + *vtab = (sqlite3_vtab *)p; + return SQLITE_OK; +} + +static int payload_chunks_disconnect(sqlite3_vtab *vtab) { + sqlite3_free(vtab); + return SQLITE_OK; +} + +static int payload_chunks_open(sqlite3_vtab *vtab, sqlite3_vtab_cursor **cursor) { + cloudsync_payload_chunks_cursor *c = cloudsync_memory_zeroalloc(sizeof(*c)); + if (!c) return SQLITE_NOMEM; + c->vtab = (cloudsync_payload_chunks_vtab *)vtab; + *cursor = (sqlite3_vtab_cursor *)c; + return SQLITE_OK; +} + +static int payload_chunks_close(sqlite3_vtab_cursor *cursor) { + cloudsync_payload_chunks_cursor *c = (cloudsync_payload_chunks_cursor *)cursor; + if (c->src) sqlite3_finalize(c->src); + if (c->payload) cloudsync_memory_free(c->payload); + cloudsync_memory_free(c); + return SQLITE_OK; +} + +static int payload_chunks_best_index(sqlite3_vtab *vtab, sqlite3_index_info *idxinfo) { + UNUSED_PARAMETER(vtab); + int argv_index = 1; + int idxnum = 0; + for (int i = 0; i < idxinfo->nConstraint; ++i) { + struct sqlite3_index_constraint *cn = &idxinfo->aConstraint[i]; + if (!cn->usable || cn->op != SQLITE_INDEX_CONSTRAINT_EQ) continue; + if (cn->iColumn == 7 || cn->iColumn == 8 || cn->iColumn == 9) { + idxinfo->aConstraintUsage[i].argvIndex = argv_index++; + idxinfo->aConstraintUsage[i].omit = 1; + if (cn->iColumn == 7) idxnum |= 1; + if (cn->iColumn == 8) idxnum |= 2; + if (cn->iColumn == 9) idxnum |= 4; + } + } + idxinfo->idxNum = idxnum; + idxinfo->estimatedCost = 10.0; + idxinfo->estimatedRows = 10; + return SQLITE_OK; +} + +static int payload_chunks_step_source(cloudsync_payload_chunks_cursor *c) { + int rc = sqlite3_step(c->src); + if (rc == SQLITE_ROW) { c->has_row = true; return SQLITE_OK; } + c->has_row = false; + return rc == SQLITE_DONE ? SQLITE_OK : rc; +} + +static int payload_chunks_plan_fragment(cloudsync_payload_chunks_cursor *c) { + cloudsync_context *data = c->vtab->data; + int target = cloudsync_payload_fragment_data_size(data, + (const char *)sqlite3_column_text(c->src, 0), sqlite3_column_bytes(c->src, 0), + sqlite3_column_blob(c->src, 1), sqlite3_column_bytes(c->src, 1), + (const char *)sqlite3_column_text(c->src, 2), sqlite3_column_bytes(c->src, 2), + sqlite3_column_int64(c->src, 4), sqlite3_column_int64(c->src, 5), + sqlite3_column_blob(c->src, 6), sqlite3_column_bytes(c->src, 6), + sqlite3_column_int64(c->src, 7), sqlite3_column_int64(c->src, 8), + c->frag_total, 0, 1); + if (target <= 0) return SQLITE_TOOBIG; + + int count = 0; + for (int i = 0; i < 8; ++i) { + count = cloudsync_payload_fragment_count(c->frag_total, target); + if (count <= 0) return SQLITE_TOOBIG; + int planned = cloudsync_payload_fragment_data_size(data, + (const char *)sqlite3_column_text(c->src, 0), sqlite3_column_bytes(c->src, 0), + sqlite3_column_blob(c->src, 1), sqlite3_column_bytes(c->src, 1), + (const char *)sqlite3_column_text(c->src, 2), sqlite3_column_bytes(c->src, 2), + sqlite3_column_int64(c->src, 4), sqlite3_column_int64(c->src, 5), + sqlite3_column_blob(c->src, 6), sqlite3_column_bytes(c->src, 6), + sqlite3_column_int64(c->src, 7), sqlite3_column_int64(c->src, 8), + c->frag_total, count - 1, count); + if (planned <= 0) return SQLITE_TOOBIG; + if (planned == target) break; + target = planned; + } + + c->frag_target = target; + c->frag_count = cloudsync_payload_fragment_count(c->frag_total, target); + if (c->frag_count <= 0) return SQLITE_TOOBIG; + return SQLITE_OK; +} + +static int payload_chunks_emit_fragment(cloudsync_payload_chunks_cursor *c) { + cloudsync_context *data = c->vtab->data; + if (c->payload) { cloudsync_memory_free(c->payload); c->payload = NULL; } + int64_t remaining = c->frag_total - c->frag_offset; + int frag_len = remaining > c->frag_target ? c->frag_target : (int)remaining; + if (frag_len <= 0) return SQLITE_CORRUPT; + char *frag = cloudsync_memory_alloc((uint64_t)frag_len); + if (!frag) return SQLITE_NOMEM; + int copied = 0; + int64_t off = c->frag_offset; + if (off < c->value_header_len) { + int n = c->value_header_len - (int)off; + if (n > frag_len) n = frag_len; + memcpy(frag, c->value_header + off, (size_t)n); + copied += n; + off += n; + } + if (copied < frag_len) { + int64_t data_off = off - c->value_header_len; + memcpy(frag + copied, c->value_data + data_off, (size_t)(frag_len - copied)); + } + + cloudsync_payload_context *payload = cloudsync_memory_zeroalloc((uint64_t)cloudsync_payload_context_size(NULL)); + if (!payload) { cloudsync_memory_free(frag); return SQLITE_NOMEM; } + int rc = cloudsync_payload_encode_fragment_step(payload, data, + (const char *)sqlite3_column_text(c->src, 0), sqlite3_column_bytes(c->src, 0), + sqlite3_column_blob(c->src, 1), sqlite3_column_bytes(c->src, 1), + (const char *)sqlite3_column_text(c->src, 2), sqlite3_column_bytes(c->src, 2), + frag, frag_len, + sqlite3_column_int64(c->src, 4), sqlite3_column_int64(c->src, 5), + sqlite3_column_blob(c->src, 6), sqlite3_column_bytes(c->src, 6), + sqlite3_column_int64(c->src, 7), sqlite3_column_int64(c->src, 8), + c->frag_checksum, c->frag_total, c->frag_part, c->frag_count); + cloudsync_memory_free(frag); + if (rc != SQLITE_OK) { cloudsync_memory_free(payload); return rc; } + rc = cloudsync_payload_encode_final(payload, data); + if (rc != SQLITE_OK) { cloudsync_memory_free(payload); return rc; } + c->payload = cloudsync_payload_blob(payload, &c->payload_size, &c->rows); + cloudsync_memory_free(payload); + c->dbv_min = sqlite3_column_int64(c->src, 5); + c->dbv_max = c->dbv_min; + c->chunk_index++; + c->frag_offset += frag_len; + c->frag_part++; + if (c->frag_part >= c->frag_count) { + c->frag_active = false; + rc = payload_chunks_step_source(c); + } + return rc; +} + +static int payload_chunks_build_next(cloudsync_payload_chunks_cursor *c) { + cloudsync_context *data = c->vtab->data; + int rc = SQLITE_OK; + if (c->payload) { cloudsync_memory_free(c->payload); c->payload = NULL; } + c->payload_size = c->rows = c->dbv_min = c->dbv_max = 0; + if (c->frag_active) return payload_chunks_emit_fragment(c); + if (!c->has_row) { c->eof = true; return SQLITE_OK; } + + int max_size = cloudsync_payload_max_chunk_size(data); + size_t payload_header_size = 0; + cloudsync_payload_context_size(&payload_header_size); + cloudsync_payload_context *payload = cloudsync_memory_zeroalloc((uint64_t)cloudsync_payload_context_size(NULL)); + if (!payload) return SQLITE_NOMEM; + while (c->has_row) { + sqlite3_value *rowv[9]; + for (int i = 0; i < 9; ++i) rowv[i] = sqlite3_column_value(c->src, i); + size_t row_size = pk_encode_size((dbvalue_t **)rowv, 9, 0, -1); + if (row_size == SIZE_MAX) { cloudsync_memory_free(payload); return SQLITE_NOMEM; } + + if ((int64_t)row_size + (int64_t)payload_header_size + CLOUDSYNC_PAYLOAD_CHUNK_SAFETY_MARGIN > max_size) { + if (cloudsync_payload_context_nrows(payload) > 0) break; + dbvalue_t *col_value = (dbvalue_t *)rowv[3]; + int type = database_value_type(col_value); + if (type != DBTYPE_TEXT && type != DBTYPE_BLOB) { cloudsync_memory_free(payload); return SQLITE_TOOBIG; } + int64_t raw_len = 0; + int header_len = cloudsync_payload_encoded_value_header(col_value, c->value_header, sizeof(c->value_header), &raw_len); + if (header_len <= 0) { cloudsync_memory_free(payload); return SQLITE_ERROR; } + c->value_header_len = header_len; + c->value_data = (const char *)database_value_blob(col_value); + c->value_data_len = raw_len; + c->frag_total = header_len + raw_len; + c->frag_offset = 0; + c->frag_part = 0; + rc = payload_chunks_plan_fragment(c); + if (rc != SQLITE_OK) { cloudsync_memory_free(payload); return rc; } + c->frag_checksum = cloudsync_payload_encoded_value_checksum(col_value); + c->frag_active = true; + cloudsync_memory_free(payload); + return payload_chunks_emit_fragment(c); + } + + if (cloudsync_payload_context_nrows(payload) > 0 && cloudsync_payload_context_bused(payload) + row_size > (size_t)max_size) break; + rc = cloudsync_payload_encode_step(payload, data, 9, (dbvalue_t **)rowv); + if (rc != SQLITE_OK) { cloudsync_memory_free(payload); return rc; } + int64_t dbv = sqlite3_column_int64(c->src, 5); + if (cloudsync_payload_context_nrows(payload) == 1) c->dbv_min = dbv; + c->dbv_max = dbv; + rc = payload_chunks_step_source(c); + if (rc != SQLITE_OK) { cloudsync_memory_free(payload); return rc; } + } + + if (cloudsync_payload_context_nrows(payload) == 0) { cloudsync_memory_free(payload); c->eof = true; return SQLITE_OK; } + rc = cloudsync_payload_encode_final(payload, data); + if (rc != SQLITE_OK) { cloudsync_memory_free(payload); return rc; } + c->payload = cloudsync_payload_blob(payload, &c->payload_size, &c->rows); + cloudsync_memory_free(payload); + c->chunk_index++; + return SQLITE_OK; +} + +static int payload_chunks_filter(sqlite3_vtab_cursor *cursor, int idxnum, const char *idxstr, int argc, sqlite3_value **argv) { + UNUSED_PARAMETER(idxstr); UNUSED_PARAMETER(argc); + cloudsync_payload_chunks_cursor *c = (cloudsync_payload_chunks_cursor *)cursor; + cloudsync_context *data = c->vtab->data; + if (c->src) { sqlite3_finalize(c->src); c->src = NULL; } + if (c->payload) { cloudsync_memory_free(c->payload); c->payload = NULL; } + bool old_eof = c->eof; + UNUSED_PARAMETER(old_eof); + memset(&c->eof, 0, sizeof(*c) - offsetof(cloudsync_payload_chunks_cursor, eof)); + + int argi = 0; + int64_t since = dbutils_settings_get_int64_value(data, CLOUDSYNC_KEY_SEND_DBVERSION); + const void *site_id = cloudsync_siteid(data); + int site_id_len = UUID_LEN; + int64_t until = 0; + if (idxnum & 1) since = sqlite3_value_int64(argv[argi++]); + if (idxnum & 2) { site_id = sqlite3_value_blob(argv[argi]); site_id_len = sqlite3_value_bytes(argv[argi]); argi++; } + if (idxnum & 4) until = sqlite3_value_int64(argv[argi++]); + if (until == 0) { + sqlite3_stmt *mx = NULL; + int rc = sqlite3_prepare_v2(c->vtab->db, "SELECT COALESCE(MAX(db_version),0) FROM cloudsync_changes WHERE site_id=?", -1, &mx, NULL); + if (rc != SQLITE_OK) return rc; + sqlite3_bind_blob(mx, 1, site_id, site_id_len, SQLITE_TRANSIENT); + if (sqlite3_step(mx) == SQLITE_ROW) until = sqlite3_column_int64(mx, 0); + sqlite3_finalize(mx); + } + c->watermark = until; + + const char *sql = "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq " + "FROM cloudsync_changes WHERE db_version>? AND site_id=? AND db_version<=? ORDER BY db_version, seq ASC"; + int rc = sqlite3_prepare_v2(c->vtab->db, sql, -1, &c->src, NULL); + if (rc != SQLITE_OK) return rc; + sqlite3_bind_int64(c->src, 1, since); + sqlite3_bind_blob(c->src, 2, site_id, site_id_len, SQLITE_TRANSIENT); + sqlite3_bind_int64(c->src, 3, until); + rc = payload_chunks_step_source(c); + if (rc != SQLITE_OK) return rc; + return payload_chunks_build_next(c); +} + +static int payload_chunks_next(sqlite3_vtab_cursor *cursor) { + return payload_chunks_build_next((cloudsync_payload_chunks_cursor *)cursor); +} + +static int payload_chunks_eof(sqlite3_vtab_cursor *cursor) { + return ((cloudsync_payload_chunks_cursor *)cursor)->eof; +} + +static int payload_chunks_column(sqlite3_vtab_cursor *cursor, sqlite3_context *ctx, int col) { + cloudsync_payload_chunks_cursor *c = (cloudsync_payload_chunks_cursor *)cursor; + switch (col) { + case 0: sqlite3_result_blob64(ctx, c->payload, (sqlite3_uint64)c->payload_size, SQLITE_TRANSIENT); break; + case 1: sqlite3_result_int(ctx, c->chunk_index - 1); break; + case 2: sqlite3_result_int64(ctx, c->payload_size); break; + case 3: sqlite3_result_int64(ctx, c->rows); break; + case 4: sqlite3_result_int64(ctx, c->dbv_min); break; + case 5: sqlite3_result_int64(ctx, c->dbv_max); break; + case 6: sqlite3_result_int64(ctx, c->watermark); break; + default: sqlite3_result_null(ctx); break; + } + return SQLITE_OK; +} + +static int payload_chunks_rowid(sqlite3_vtab_cursor *cursor, sqlite3_int64 *rowid) { + *rowid = ((cloudsync_payload_chunks_cursor *)cursor)->chunk_index; + return SQLITE_OK; +} + +static sqlite3_module cloudsync_payload_chunks_module = { + /* iVersion */ 0, + /* xCreate */ NULL, + /* xConnect */ payload_chunks_connect, + /* xBestIndex */ payload_chunks_best_index, + /* xDisconnect */ payload_chunks_disconnect, + /* xDestroy */ NULL, + /* xOpen */ payload_chunks_open, + /* xClose */ payload_chunks_close, + /* xFilter */ payload_chunks_filter, + /* xNext */ payload_chunks_next, + /* xEof */ payload_chunks_eof, + /* xColumn */ payload_chunks_column, + /* xRowid */ payload_chunks_rowid, + /* xUpdate */ NULL, + /* xBegin */ NULL, + /* xSync */ NULL, + /* xCommit */ NULL, + /* xRollback */ NULL, + /* xFindMethod */ NULL, + /* xRename */ NULL, + /* xSavepoint */ NULL, + /* xRelease */ NULL, + /* xRollbackTo */ NULL, + /* xShadowName */ NULL, + /* xIntegrity */ NULL +}; + #ifdef CLOUDSYNC_DESKTOP_OS void dbsync_payload_save (sqlite3_context *context, int argc, sqlite3_value **argv) { DEBUG_FUNCTION("dbsync_payload_save"); @@ -1452,6 +1792,9 @@ int dbsync_register_functions (sqlite3 *db, char **pzErrMsg) { if (rc != SQLITE_OK) return rc; rc = dbsync_register_function(db, "cloudsync_payload_apply", dbsync_payload_decode, -1, pzErrMsg, ctx, NULL); if (rc != SQLITE_OK) return rc; + + rc = sqlite3_create_module(db, "cloudsync_payload_chunks", &cloudsync_payload_chunks_module, (void *)ctx); + if (rc != SQLITE_OK) return rc; #ifdef CLOUDSYNC_DESKTOP_OS rc = dbsync_register_function(db, "cloudsync_payload_save", dbsync_payload_save, 1, pzErrMsg, ctx, NULL); diff --git a/src/sqlite/sql_sqlite.c b/src/sqlite/sql_sqlite.c index 471ae9b..f01a307 100644 --- a/src/sqlite/sql_sqlite.c +++ b/src/sqlite/sql_sqlite.c @@ -280,6 +280,37 @@ const char * const SQL_CHANGES_INSERT_ROW = "INSERT INTO cloudsync_changes(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) " "VALUES (?,?,?,?,?,?,?,?,?);"; +const char * const SQL_PAYLOAD_FRAGMENTS_CREATE_TABLE = + "CREATE TABLE IF NOT EXISTS cloudsync_payload_fragments (" + "value_id TEXT NOT NULL, part_index INTEGER NOT NULL, part_count INTEGER NOT NULL, total_size INTEGER NOT NULL, " + "checksum TEXT NOT NULL, created_at INTEGER NOT NULL DEFAULT (unixepoch()), " + "tbl TEXT NOT NULL, pk BLOB NOT NULL, col_name TEXT NOT NULL, col_version INTEGER NOT NULL, db_version INTEGER NOT NULL, " + "site_id BLOB NOT NULL, cl INTEGER NOT NULL, seq INTEGER NOT NULL, fragment BLOB NOT NULL, " + "PRIMARY KEY(value_id, part_index)) WITHOUT ROWID;"; + +const char * const SQL_PAYLOAD_FRAGMENTS_UPSERT = + "INSERT OR REPLACE INTO cloudsync_payload_fragments " + "(value_id, part_index, part_count, total_size, checksum, created_at, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, fragment) " + "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);"; + +const char * const SQL_PAYLOAD_FRAGMENTS_COUNT = + "SELECT COUNT(*), MIN(part_count), MAX(part_count), MIN(total_size), MAX(total_size), " + "MIN(checksum), MAX(checksum), MIN(part_index), MAX(part_index) " + "FROM cloudsync_payload_fragments WHERE value_id=?;"; + +const char * const SQL_PAYLOAD_FRAGMENTS_SELECT = + "SELECT fragment, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, checksum " + "FROM cloudsync_payload_fragments WHERE value_id=? ORDER BY part_index ASC;"; + +const char * const SQL_PAYLOAD_FRAGMENTS_DELETE = + "DELETE FROM cloudsync_payload_fragments WHERE value_id=?;"; + +const char * const SQL_PAYLOAD_FRAGMENTS_CLEANUP_STALE = + "DELETE FROM cloudsync_payload_fragments " + "WHERE created_at < ? AND value_id IN (" + "SELECT value_id FROM cloudsync_payload_fragments GROUP BY value_id " + "HAVING COUNT(*) < MAX(part_count));"; + // MARK: Blocks (block-level LWW) const char * const SQL_BLOCKS_CREATE_TABLE = diff --git a/test/postgresql/39_payload_chunks.sql b/test/postgresql/39_payload_chunks.sql new file mode 100644 index 0000000..684cac6 --- /dev/null +++ b/test/postgresql/39_payload_chunks.sql @@ -0,0 +1,201 @@ +-- Payload chunks and transparent large-value fragmentation + +\set testid '39-chunks' +\ir helper_test_init.sql + +\connect postgres +\ir helper_psql_conn_setup.sql +DROP DATABASE IF EXISTS cloudsync_test_39_chunks_src; +DROP DATABASE IF EXISTS cloudsync_test_39_chunks_dst; +DROP DATABASE IF EXISTS cloudsync_test_39_chunks_legacy; +CREATE DATABASE cloudsync_test_39_chunks_src; +CREATE DATABASE cloudsync_test_39_chunks_dst; +CREATE DATABASE cloudsync_test_39_chunks_legacy; + +\connect cloudsync_test_39_chunks_src +\ir helper_psql_conn_setup.sql +CREATE EXTENSION IF NOT EXISTS cloudsync; +CREATE TABLE payload_chunk_test ( + id TEXT PRIMARY KEY, + note TEXT DEFAULT '', + data BYTEA DEFAULT '\x'::bytea +); +SELECT cloudsync_init('payload_chunk_test', 'CLS', 1) AS _init_src \gset +SELECT cloudsync_set('payload_max_chunk_size', '1'); + +INSERT INTO payload_chunk_test(id, note, data) +SELECT + 'big', + (SELECT string_agg(md5(i::text), '') FROM generate_series(1, 22500) AS g(i)), + decode((SELECT string_agg(md5((i * 17)::text), '') FROM generate_series(1, 23000) AS g(i)), 'hex'); + +INSERT INTO payload_chunk_test(id, note, data) +VALUES + ('same-a', 'same payload a', decode(repeat('ab', 360000), 'hex')), + ('same-b', 'same payload b', decode(repeat('ab', 360000), 'hex')); + +INSERT INTO payload_chunk_test(id, note, data) +SELECT + format('row-%s', lpad(i::text, 3, '0')), + format('small-%s-%s', i, repeat(md5(i::text), 24)), + decode(repeat(md5((i * 31)::text), 16), 'hex') +FROM generate_series(1, 260) AS g(i); + +SELECT + count(*) AS chunk_count, + count(*) FILTER (WHERE get_byte(payload, 4) = 3) AS v3_chunk_count, + bool_and(octet_length(payload) <= 262144) AS chunks_within_limit, + max(octet_length(payload)) AS max_chunk_len, + sum(rows) AS chunk_rows +FROM cloudsync_payload_chunks() \gset + +\if :chunks_within_limit +\echo [PASS] (:testid) Generated chunks respect the 256KB technical minimum - max: :max_chunk_len +\else +\echo [FAIL] (:testid) Generated chunk exceeds 256KB - max: :max_chunk_len +SELECT (:fail::int + 1) AS fail \gset +\endif + +SELECT (:chunk_count::int >= 5 AND :v3_chunk_count::int >= 2) AS chunk_shape_ok \gset +\if :chunk_shape_ok +\echo [PASS] (:testid) Rowset and large-value fragmentation produced multiple chunks (:chunk_count total, :v3_chunk_count v3) +\else +\echo [FAIL] (:testid) Expected multiple chunks and v3 fragments, got :chunk_count total and :v3_chunk_count v3 +SELECT (:fail::int + 1) AS fail \gset +\endif + +SELECT count(*) AS explicit_arg_chunk_count +FROM cloudsync_payload_chunks(NULL, cloudsync_siteid(), NULL) \gset + +SELECT (:explicit_arg_chunk_count::int = :chunk_count::int) AS explicit_args_ok \gset +\if :explicit_args_ok +\echo [PASS] (:testid) Optional cloudsync_payload_chunks arguments work +\else +\echo [FAIL] (:testid) Optional cloudsync_payload_chunks arguments changed result count +SELECT (:fail::int + 1) AS fail \gset +\endif + +SELECT + md5(string_agg(id || ':' || note || ':' || encode(data, 'hex'), '|' ORDER BY id)) AS src_hash, + count(*) AS src_count +FROM payload_chunk_test \gset + +SELECT string_agg(encode(payload, 'hex'), ',' ORDER BY chunk_index) AS chunks_hex +FROM cloudsync_payload_chunks() \gset + +SELECT + encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS legacy_payload_hex, + octet_length(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq)) AS legacy_payload_len +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() \gset + +SELECT (:legacy_payload_len::int > 262144) AS legacy_payload_large_ok \gset +\if :legacy_payload_large_ok +\echo [PASS] (:testid) Legacy monolithic payload is larger than local chunk setting (:legacy_payload_len bytes) +\else +\echo [FAIL] (:testid) Legacy monolithic payload was expected to exceed the chunk setting +SELECT (:fail::int + 1) AS fail \gset +\endif + +\connect cloudsync_test_39_chunks_dst +\ir helper_psql_conn_setup.sql +CREATE EXTENSION IF NOT EXISTS cloudsync; +CREATE TABLE payload_chunk_test ( + id TEXT PRIMARY KEY, + note TEXT DEFAULT '', + data BYTEA DEFAULT '\x'::bytea +); +SELECT cloudsync_init('payload_chunk_test', 'CLS', 1) AS _init_dst \gset +SELECT cloudsync_set('payload_max_chunk_size', '262144'); + +CREATE TEMP TABLE chunk_transport(ord INT, payload BYTEA); +INSERT INTO chunk_transport(ord, payload) +SELECT ord::int, decode(chunk_hex, 'hex') +FROM unnest(string_to_array(:'chunks_hex', ',')) WITH ORDINALITY AS t(chunk_hex, ord); + +SELECT coalesce(sum(cloudsync_payload_apply(payload)), 0) AS chunk_apply_rows +FROM (SELECT payload FROM chunk_transport ORDER BY ord DESC) AS ordered_chunks \gset + +SELECT + md5(string_agg(id || ':' || note || ':' || encode(data, 'hex'), '|' ORDER BY id)) AS dst_hash, + count(*) AS dst_count +FROM payload_chunk_test \gset + +SELECT (:'dst_hash' = :'src_hash' AND :dst_count::int = :src_count::int) AS chunk_apply_ok \gset +\if :chunk_apply_ok +\echo [PASS] (:testid) Chunked payloads apply correctly, including reverse-order v3 fragments and identical large values +\else +\echo [FAIL] (:testid) Chunked payload apply mismatch +SELECT (:fail::int + 1) AS fail \gset +\endif + +CREATE TABLE IF NOT EXISTS cloudsync_payload_fragments ( + value_id TEXT NOT NULL, + part_index BIGINT NOT NULL, + part_count BIGINT NOT NULL, + total_size BIGINT NOT NULL, + checksum TEXT NOT NULL, + created_at BIGINT NOT NULL DEFAULT (EXTRACT(EPOCH FROM now())::bigint), + tbl TEXT NOT NULL, + pk BYTEA NOT NULL, + col_name TEXT NOT NULL, + col_version BIGINT NOT NULL, + db_version BIGINT NOT NULL, + site_id BYTEA NOT NULL, + cl BIGINT NOT NULL, + seq BIGINT NOT NULL, + fragment BYTEA NOT NULL, + PRIMARY KEY(value_id, part_index) +); +INSERT INTO cloudsync_payload_fragments +(value_id, part_index, part_count, total_size, checksum, created_at, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, fragment) +VALUES ('stale-incomplete', 0, 2, 10, '0000000000000000', 0, 'payload_chunk_test', '\x01', 'data', 1, 1, decode(repeat('00', 16), 'hex'), 1, 1, '\x00'); + +SELECT cloudsync_payload_apply(payload) AS stale_cleanup_apply +FROM chunk_transport +WHERE get_byte(payload, 4) = 3 +ORDER BY ord +LIMIT 1 \gset + +SELECT (COUNT(*) = 0) AS stale_cleanup_ok +FROM cloudsync_payload_fragments +WHERE value_id = 'stale-incomplete' \gset + +\if :stale_cleanup_ok +\echo [PASS] (:testid) Stale incomplete fragment cleanup works +\else +\echo [FAIL] (:testid) Stale incomplete fragment cleanup failed +SELECT (:fail::int + 1) AS fail \gset +\endif + +\connect cloudsync_test_39_chunks_legacy +\ir helper_psql_conn_setup.sql +CREATE EXTENSION IF NOT EXISTS cloudsync; +CREATE TABLE payload_chunk_test ( + id TEXT PRIMARY KEY, + note TEXT DEFAULT '', + data BYTEA DEFAULT '\x'::bytea +); +SELECT cloudsync_init('payload_chunk_test', 'CLS', 1) AS _init_legacy \gset +SELECT cloudsync_set('payload_max_chunk_size', '262144'); +SELECT cloudsync_payload_apply(decode(:'legacy_payload_hex', 'hex')) AS legacy_apply_rows \gset + +SELECT + md5(string_agg(id || ':' || note || ':' || encode(data, 'hex'), '|' ORDER BY id)) AS legacy_hash, + count(*) AS legacy_count +FROM payload_chunk_test \gset + +SELECT (:'legacy_hash' = :'src_hash' AND :legacy_count::int = :src_count::int) AS legacy_apply_ok \gset +\if :legacy_apply_ok +\echo [PASS] (:testid) Legacy monolithic payload applies even when larger than local chunk setting +\else +\echo [FAIL] (:testid) Legacy monolithic payload apply mismatch +SELECT (:fail::int + 1) AS fail \gset +\endif + +\ir helper_test_cleanup.sql +\if :should_cleanup +DROP DATABASE IF EXISTS cloudsync_test_39_chunks_src; +DROP DATABASE IF EXISTS cloudsync_test_39_chunks_dst; +DROP DATABASE IF EXISTS cloudsync_test_39_chunks_legacy; +\endif diff --git a/test/postgresql/full_test.sql b/test/postgresql/full_test.sql index 9ff000a..1d2e492 100644 --- a/test/postgresql/full_test.sql +++ b/test/postgresql/full_test.sql @@ -46,6 +46,7 @@ \ir 36_block_lww_round3.sql \ir 37_block_lww_round4.sql \ir 38_block_lww_round5.sql +\ir 39_payload_chunks.sql \ir 39_concurrent_write_apply.sql \ir 40_unsupported_algorithms.sql \ir 41_corrupted_payload.sql diff --git a/test/unit.c b/test/unit.c index 05e9c95..3bd74cd 100644 --- a/test/unit.c +++ b/test/unit.c @@ -11941,6 +11941,237 @@ bool do_test_corrupted_payload (int nclients, bool print_result, bool cleanup_da return result; } +typedef struct { + void *data; + int len; +} test_payload_chunk; + +static void test_payload_chunks_free(test_payload_chunk *chunks, int count) { + if (!chunks) return; + for (int i = 0; i < count; ++i) { + if (chunks[i].data) free(chunks[i].data); + } + free(chunks); +} + +static bool test_payload_chunks_tables_equal(sqlite3 *src, sqlite3 *dst) { + sqlite3_stmt *s1 = NULL; + sqlite3_stmt *s2 = NULL; + bool result = false; + + const char *sql = "SELECT id, note, data FROM payload_chunk_test ORDER BY id;"; + int rc1 = sqlite3_prepare_v2(src, sql, -1, &s1, NULL); + int rc2 = sqlite3_prepare_v2(dst, sql, -1, &s2, NULL); + if (rc1 != SQLITE_OK || rc2 != SQLITE_OK) goto finalize; + + while (1) { + rc1 = sqlite3_step(s1); + rc2 = sqlite3_step(s2); + if (rc1 != rc2) goto finalize; + if (rc1 == SQLITE_DONE) break; + if (rc1 != SQLITE_ROW) goto finalize; + + for (int i = 0; i < 3; ++i) { + int t1 = sqlite3_column_type(s1, i); + int t2 = sqlite3_column_type(s2, i); + int n1 = sqlite3_column_bytes(s1, i); + int n2 = sqlite3_column_bytes(s2, i); + if (t1 != t2 || n1 != n2) goto finalize; + const void *v1 = (t1 == SQLITE_BLOB) ? sqlite3_column_blob(s1, i) : sqlite3_column_text(s1, i); + const void *v2 = (t2 == SQLITE_BLOB) ? sqlite3_column_blob(s2, i) : sqlite3_column_text(s2, i); + if (n1 > 0 && (!v1 || !v2 || memcmp(v1, v2, n1) != 0)) goto finalize; + } + } + + result = true; + +finalize: + if (s1) sqlite3_finalize(s1); + if (s2) sqlite3_finalize(s2); + return result; +} + +bool do_test_payload_chunks_large_values (bool print_result, bool cleanup_databases) { + sqlite3 *db[3] = {NULL, NULL, NULL}; + sqlite3_stmt *stmt = NULL; + sqlite3_stmt *apply = NULL; + test_payload_chunk *chunks = NULL; + int chunk_count = 0; + int chunk_cap = 0; + int v3_count = 0; + int first_v3_chunk = -1; + bool result = false; + int rc = SQLITE_OK; + const int max_chunk_size = CLOUDSYNC_PAYLOAD_CHUNK_MIN_SIZE; + + time_t timestamp = time(NULL); + int saved_counter = test_counter++; + + for (int i = 0; i < 3; ++i) { + db[i] = do_create_database_file(i, timestamp, saved_counter); + if (!db[i]) goto finalize; + + rc = sqlite3_exec(db[i], + "CREATE TABLE payload_chunk_test (" + "id TEXT PRIMARY KEY, " + "note TEXT DEFAULT '', " + "data BLOB DEFAULT x'');" + "SELECT cloudsync_init('payload_chunk_test');", + NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + } + + rc = sqlite3_exec(db[0], + "SELECT cloudsync_set('payload_max_chunk_size', '262144');" + "INSERT INTO payload_chunk_test(id, note, data) " + "VALUES ('big', lower(hex(randomblob(360000))), randomblob(720000));" + "INSERT INTO payload_chunk_test(id, note, data) " + "VALUES ('same-a', 'same payload a', zeroblob(720000));" + "INSERT INTO payload_chunk_test(id, note, data) " + "VALUES ('same-b', 'same payload b', zeroblob(720000));" + "WITH RECURSIVE c(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM c WHERE i < 260) " + "INSERT INTO payload_chunk_test(id, note, data) " + "SELECT printf('row-%03d', i), printf('small-%03d-%s', i, hex(randomblob(850))), randomblob(512) FROM c;", + NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + + rc = sqlite3_prepare_v2(db[0], + "SELECT payload, payload_size, rows FROM cloudsync_payload_chunks() ORDER BY chunk_index;", + -1, &stmt, NULL); + if (rc != SQLITE_OK) goto finalize; + + while ((rc = sqlite3_step(stmt)) == SQLITE_ROW) { + int len = sqlite3_column_bytes(stmt, 0); + const void *payload = sqlite3_column_blob(stmt, 0); + sqlite3_int64 payload_size = sqlite3_column_int64(stmt, 1); + sqlite3_int64 rows = sqlite3_column_int64(stmt, 2); + if (!payload || len <= 0 || payload_size != len || len > max_chunk_size || rows <= 0) goto finalize; + if (len > 4 && ((const unsigned char *)payload)[4] == 3) { + if (first_v3_chunk < 0) first_v3_chunk = chunk_count; + ++v3_count; + } + + if (chunk_count == chunk_cap) { + int new_cap = chunk_cap ? chunk_cap * 2 : 16; + test_payload_chunk *new_chunks = realloc(chunks, sizeof(*chunks) * new_cap); + if (!new_chunks) goto finalize; + memset(new_chunks + chunk_cap, 0, sizeof(*chunks) * (new_cap - chunk_cap)); + chunks = new_chunks; + chunk_cap = new_cap; + } + + chunks[chunk_count].data = malloc(len); + if (!chunks[chunk_count].data) goto finalize; + memcpy(chunks[chunk_count].data, payload, len); + chunks[chunk_count].len = len; + ++chunk_count; + } + if (rc != SQLITE_DONE) goto finalize; + sqlite3_finalize(stmt); + stmt = NULL; + + if (chunk_count < 5 || v3_count < 2) goto finalize; + + rc = sqlite3_prepare_v2(db[1], "SELECT cloudsync_payload_apply(?);", -1, &apply, NULL); + if (rc != SQLITE_OK) goto finalize; + + // Apply in reverse order to verify that v3 value fragments can be staged + // and completed independently from transport ordering. + for (int i = chunk_count - 1; i >= 0; --i) { + rc = sqlite3_bind_blob(apply, 1, chunks[i].data, chunks[i].len, SQLITE_STATIC); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_step(apply); + if (rc != SQLITE_ROW) goto finalize; + sqlite3_reset(apply); + sqlite3_clear_bindings(apply); + } + sqlite3_finalize(apply); + apply = NULL; + + if (!test_payload_chunks_tables_equal(db[0], db[1])) goto finalize; + + if (first_v3_chunk < 0) goto finalize; + rc = sqlite3_exec(db[1], + "CREATE TABLE IF NOT EXISTS cloudsync_payload_fragments (" + "value_id TEXT NOT NULL, part_index INTEGER NOT NULL, part_count INTEGER NOT NULL, total_size INTEGER NOT NULL, " + "checksum TEXT NOT NULL, created_at INTEGER NOT NULL DEFAULT (unixepoch()), " + "tbl TEXT NOT NULL, pk BLOB NOT NULL, col_name TEXT NOT NULL, col_version INTEGER NOT NULL, db_version INTEGER NOT NULL, " + "site_id BLOB NOT NULL, cl INTEGER NOT NULL, seq INTEGER NOT NULL, fragment BLOB NOT NULL, " + "PRIMARY KEY(value_id, part_index)) WITHOUT ROWID;" + "INSERT OR REPLACE INTO cloudsync_payload_fragments " + "(value_id, part_index, part_count, total_size, checksum, created_at, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, fragment) " + "VALUES ('stale-incomplete', 0, 2, 10, '0000000000000000', 0, 'payload_chunk_test', x'01', 'data', 1, 1, zeroblob(16), 1, 1, x'00');", + NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + + rc = sqlite3_prepare_v2(db[1], "SELECT cloudsync_payload_apply(?);", -1, &apply, NULL); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_bind_blob(apply, 1, chunks[first_v3_chunk].data, chunks[first_v3_chunk].len, SQLITE_STATIC); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_step(apply); + if (rc != SQLITE_ROW) goto finalize; + sqlite3_finalize(apply); + apply = NULL; + + rc = sqlite3_prepare_v2(db[1], "SELECT COUNT(*) FROM cloudsync_payload_fragments WHERE value_id='stale-incomplete';", -1, &stmt, NULL); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_step(stmt); + if (rc != SQLITE_ROW || sqlite3_column_int(stmt, 0) != 0) goto finalize; + sqlite3_finalize(stmt); + stmt = NULL; + + // The legacy aggregate API must still produce an applyable monolithic payload. + rc = sqlite3_prepare_v2(db[0], + "SELECT cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) " + "FROM cloudsync_changes WHERE site_id=cloudsync_siteid();", + -1, &stmt, NULL); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_step(stmt); + if (rc != SQLITE_ROW) goto finalize; + rc = sqlite3_prepare_v2(db[2], "SELECT cloudsync_payload_apply(?);", -1, &apply, NULL); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_bind_value(apply, 1, sqlite3_column_value(stmt, 0)); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_step(apply); + if (rc != SQLITE_ROW) goto finalize; + sqlite3_finalize(apply); + apply = NULL; + sqlite3_finalize(stmt); + stmt = NULL; + + if (!test_payload_chunks_tables_equal(db[0], db[2])) goto finalize; + + result = true; + +finalize: + if (!result && print_result) { + printf("do_test_payload_chunks_large_values error: %s / %s\n", + db[0] ? sqlite3_errmsg(db[0]) : "no source db", + db[1] ? sqlite3_errmsg(db[1]) : "no dest db"); + } + if (stmt) sqlite3_finalize(stmt); + if (apply) sqlite3_finalize(apply); + test_payload_chunks_free(chunks, chunk_count); + + for (int i = 0; i < 3; ++i) { + if (db[i]) close_db(db[i]); + } + + if (cleanup_databases) { + for (int i = 0; i < 3; ++i) { + char path[256], walpath[300], shmpath[300]; + do_build_database_path(path, i, timestamp, saved_counter); + snprintf(walpath, sizeof(walpath), "%s-wal", path); + snprintf(shmpath, sizeof(shmpath), "%s-shm", path); + file_delete_internal(path); + file_delete_internal(walpath); + file_delete_internal(shmpath); + } + } + + return result; +} + bool do_test_payload_idempotency (int nclients, bool print_result, bool cleanup_databases) { sqlite3 *db[2] = {NULL, NULL}; bool result = false; @@ -12388,6 +12619,7 @@ int main (int argc, const char * argv[]) { result += test_report("Payload Buffer Test (600KB):", do_test_payload_buffer(600 * 1024)); result += test_report("Payload Buffer Test (1MB):", do_test_payload_buffer(1024 * 1024)); result += test_report("Payload Buffer Test (10MB):", do_test_payload_buffer(10 * 1024 * 1024)); + result += test_report("Payload Chunks Large Values:", do_test_payload_chunks_large_values(print_result, cleanup_databases)); // close local database close_db(db); From 92a048c570d3e879e90d1b26a3eb6ca5b3d0d2f2 Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Fri, 29 May 2026 19:36:42 -0600 Subject: [PATCH 2/2] feat(payload): site-exclusion in chunk generation + UUID conversion helpers - cloudsync_payload_chunks: add exclude_filter_site_id flag (SQLite hidden column / PG 4th arg) to stream changes from all sites except filter_site_id, as the /check download path needs; setting it without a site_id is an error - add cloudsync_uuid_text()/cloudsync_uuid_blob() scalar functions on SQLite and PostgreSQL to convert site_id between its 16-byte binary form and the canonical UUID string (tolerant of dashed/undashed input), so string-based callers can pass a site_id to cloudsync_payload_chunks - sqlite vtab: rewrite best_index to assign argv in canonical column order, fixing a latent argument-ordering bug - perf: throttle the v3 fragment stale-group GC to at most once per 60s per connection (cloudsync_context.last_fragment_cleanup), removing an O(n^2) full-table scan that ran on every applied fragment - add PostgreSQL 1.0->1.1 migration for the new chunked-payload SQL surface - build: neutralize the ambient build env for curl's ./configure (CURL_CONFIG_ENV) so exported LDFLAGS/CPPFLAGS/LIBS don't break it - test: rename PG 39_payload_chunks.sql -> 52 (39 was duplicated); add multi-site exclude, UUID roundtrip and stale-GC-throttle coverage (SQLite unit + PG) - docs: API.md (new argument + two functions) and CHANGELOG Co-Authored-By: Claude Opus 4.8 --- API.md | 70 +++++++- CHANGELOG.md | 3 + Makefile | 8 +- src/cloudsync.c | 22 ++- src/postgresql/cloudsync.sql.in | 16 +- src/postgresql/cloudsync_postgresql.c | 62 ++++++- .../migrations/cloudsync--1.0--1.1.sql | 36 ++++ src/sqlite/cloudsync_sqlite.c | 108 ++++++++++-- src/utils.c | 31 +++- src/utils.h | 4 + ...yload_chunks.sql => 52_payload_chunks.sql} | 158 +++++++++++++----- test/postgresql/full_test.sql | 2 +- test/unit.c | 153 ++++++++++++++++- 13 files changed, 590 insertions(+), 83 deletions(-) create mode 100644 src/postgresql/migrations/cloudsync--1.0--1.1.sql rename test/postgresql/{39_payload_chunks.sql => 52_payload_chunks.sql} (54%) diff --git a/API.md b/API.md index ec513f5..3e1d593 100644 --- a/API.md +++ b/API.md @@ -22,12 +22,14 @@ This document provides a reference for the SQL functions provided by the `sqlite - [`cloudsync_siteid()`](#cloudsync_siteid) - [`cloudsync_db_version()`](#cloudsync_db_version) - [`cloudsync_uuid()`](#cloudsync_uuid) + - [`cloudsync_uuid_text()`](#cloudsync_uuid_textuuid-dash_format) + - [`cloudsync_uuid_blob()`](#cloudsync_uuid_blobuuid) - [Schema Alteration Functions](#schema-alteration-functions) - [`cloudsync_begin_alter()`](#cloudsync_begin_altertable_name) - [`cloudsync_commit_alter()`](#cloudsync_commit_altertable_name) - [Payload Functions](#payload-functions) - [`cloudsync_payload_encode()`](#cloudsync_payload_encodetbl-pk-col_name-col_value-col_version-db_version-site_id-cl-seq) - - [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version) + - [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version-exclude_filter_site_id) - [`cloudsync_payload_apply()`](#cloudsync_payload_applypayload) - [Network Functions](#network-functions) - [`cloudsync_network_init()`](#cloudsync_network_initmanageddatabaseid) @@ -53,7 +55,7 @@ The following payload setting is supported: | Key | Description | Default | Minimum | |---|---|---:|---:| -| `payload_max_chunk_size` | Maximum transport payload size generated by [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version). Values below the minimum are clamped. | `5242880` (5 MB) | `262144` (256 KB) | +| `payload_max_chunk_size` | Maximum transport payload size generated by [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version-exclude_filter_site_id). Values below the minimum are clamped. | `5242880` (5 MB) | `262144` (256 KB) | `payload_max_chunk_size` affects only chunk generation. [`cloudsync_payload_apply()`](#cloudsync_payload_applypayload) continues to accept legacy payloads, monolithic payloads, and v3 chunk-fragment payloads even when they are larger than the local setting. This preserves compatibility between peers using different settings. @@ -399,6 +401,45 @@ INSERT INTO products (id, name) VALUES (cloudsync_uuid(), 'New Product'); --- +### `cloudsync_uuid_text(uuid, [dash_format])` + +**Description:** Converts a 16-byte binary UUID (such as the `site_id` stored in `cloudsync_changes`, or the value returned by [`cloudsync_siteid()`](#cloudsync_siteid)) into its canonical string form. + +**Parameters:** + +- `uuid` (BLOB/BYTEA): The 16-byte UUID. Returns `NULL` if `uuid` is `NULL`; raises an error if it is not exactly 16 bytes. +- `dash_format` (BOOLEAN, optional, default `true`): When `true`, returns the canonical 36-character dashed form (`xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`); when `false`, returns the bare 32-character hex form. + +**Returns:** The UUID as a TEXT value (lowercase hex). + +**Example:** + +```sql +SELECT cloudsync_uuid_text(cloudsync_siteid()); -- 0190a1b2-c3d4-7e5f-8a9b-001122334455 +SELECT cloudsync_uuid_text(cloudsync_siteid(), false); -- 0190a1b2c3d47e5f8a9b001122334455 +``` + +--- + +### `cloudsync_uuid_blob(uuid)` + +**Description:** Converts a UUID string into its 16-byte binary form. This is the inverse of [`cloudsync_uuid_text()`](#cloudsync_uuid_textuuid-dash_format) and lets string-based callers (for example, an HTTP `/check` endpoint holding a stringified `site_id`) pass a `site_id` to [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version-exclude_filter_site_id). + +**Parameters:** + +- `uuid` (TEXT): A UUID string. Tolerant: accepts the canonical dashed form and the bare 32-hex form, case-insensitive. Returns `NULL` if `uuid` is `NULL`; raises an error on malformed input. + +**Returns:** The 16-byte UUID as a BLOB/BYTEA. + +**Example:** + +```sql +SELECT cloudsync_uuid_blob('0190a1b2-c3d4-7e5f-8a9b-001122334455'); +SELECT cloudsync_uuid_blob('0190A1B2C3D47E5F8A9B001122334455'); +``` + +--- + ## Schema Alteration Functions ### `cloudsync_begin_alter(table_name)` @@ -451,7 +492,7 @@ SELECT cloudsync_commit_alter('my_table'); **Description:** Encodes rows from `cloudsync_changes` into a single monolithic payload. This is the legacy payload API and remains fully supported for backward compatibility. -Use this API when the expected payload size is modest or when you need to interoperate with callers that expect a single BLOB. For large rowsets or large individual BLOB/TEXT values, prefer [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version), which splits transport payloads according to `payload_max_chunk_size`. +Use this API when the expected payload size is modest or when you need to interoperate with callers that expect a single BLOB. For large rowsets or large individual BLOB/TEXT values, prefer [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version-exclude_filter_site_id), which splits transport payloads according to `payload_max_chunk_size`. **Parameters:** The function is an aggregate over the columns returned by `cloudsync_changes`: @@ -478,7 +519,7 @@ FROM cloudsync_changes; --- -### `cloudsync_payload_chunks([since_db_version], [filter_site_id], [until_db_version])` +### `cloudsync_payload_chunks([since_db_version], [filter_site_id], [until_db_version], [exclude_filter_site_id])` **Description:** Generates sync payloads as a stream of transport-sized chunks. It is the chunk-aware evolution of [`cloudsync_payload_encode()`](#cloudsync_payload_encodetbl-pk-col_name-col_value-col_version-db_version-site_id-cl-seq), designed for large rowsets and for single BLOB/TEXT values that are larger than the configured chunk size. @@ -497,8 +538,9 @@ When a single encoded column value does not fit in one chunk, CloudSync transpar **Parameters:** - `since_db_version` (INTEGER/BIGINT, optional): Start after this source database version. If omitted, CloudSync uses the stored send checkpoint. -- `filter_site_id` (BLOB, optional): Site ID whose changes should be encoded. If omitted, CloudSync uses the local site ID. +- `filter_site_id` (BLOB, optional): Site ID to filter on. With `exclude_filter_site_id` unset/`false` it selects changes **from** this site; with `exclude_filter_site_id` `true` it selects changes from every site **except** this one. If omitted (and not excluding), CloudSync uses the local site ID. - `until_db_version` (INTEGER/BIGINT, optional): Upper watermark to include. If omitted or `0`, CloudSync captures the current maximum source database version before streaming chunks. +- `exclude_filter_site_id` (BOOLEAN, optional, default `false`): When `true`, stream changes from all sites **except** `filter_site_id`. This is what the `/check` download path needs — a peer must not receive its own changes back. Setting it `true` without a `filter_site_id` is an error. The site_id stored in `cloudsync_changes` is the 16-byte binary UUID; string callers can convert with [`cloudsync_uuid_blob()`](#cloudsync_uuid_blobuuid). **Returns:** A rowset with one row per chunk: @@ -527,9 +569,17 @@ WHERE since_db_version = 100 AND site_id = cloudsync_siteid() AND until_db_version = 200 ORDER BY chunk_index; + +-- /check download: all changes EXCEPT the requesting peer's site +SELECT payload, chunk_index, watermark_db_version +FROM cloudsync_payload_chunks +WHERE since_db_version = 100 + AND site_id = cloudsync_uuid_blob('0190a1b2-c3d4-7e5f-8a9b-001122334455') + AND exclude_filter_site_id = 1 +ORDER BY chunk_index; ``` -**PostgreSQL usage:** `cloudsync_payload_chunks` is exposed as a set-returning function with three optional arguments: +**PostgreSQL usage:** `cloudsync_payload_chunks` is exposed as a set-returning function with optional arguments: ```sql -- Default: uses the stored send checkpoint and local site id @@ -539,6 +589,10 @@ FROM cloudsync_payload_chunks(); -- Explicit arguments SELECT * FROM cloudsync_payload_chunks(100, cloudsync_siteid(), 200); + +-- /check download: all changes EXCEPT the requesting peer's site +SELECT * +FROM cloudsync_payload_chunks(100, cloudsync_uuid_blob('0190a1b2-c3d4-7e5f-8a9b-001122334455'), NULL, true); ``` **Apply example:** @@ -558,7 +612,7 @@ On PostgreSQL, apply chunks as individual statements from the transport/client l - Legacy payloads generated by older SQLite Sync versions. - Monolithic payloads generated by [`cloudsync_payload_encode()`](#cloudsync_payload_encodetbl-pk-col_name-col_value-col_version-db_version-site_id-cl-seq). -- Chunk-fragment payloads generated by [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version). +- Chunk-fragment payloads generated by [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version-exclude_filter_site_id). When a v3 fragment payload is received, CloudSync stores the fragment in an internal table and returns after applying zero or more completed values. Once the final fragment for a value is received, the completed value is validated and applied. Duplicate fragment delivery is idempotent. @@ -667,7 +721,7 @@ This means: if you get JSON back, the server was reachable and the network proto **Description:** Sends all unsent local changes to the remote server. -The send path streams payloads through [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version), so `payload_max_chunk_size` also limits the payloads generated for network transport. Each generated chunk is uploaded/applied independently; the local send checkpoint is advanced only after the chunk stream completes successfully. +The send path streams payloads through [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version-exclude_filter_site_id), so `payload_max_chunk_size` also limits the payloads generated for network transport. Each generated chunk is uploaded/applied independently; the local send checkpoint is advanced only after the chunk stream completes successfully. Chunk transport is transparent to the CloudSync backend. Each chunk is sent as a normal `/apply` payload, either inline as a base64 `blob` or through the upload `url` path. There is no separate chunk flag: old payloads, monolithic payloads, and v3 fragment payloads are distinguished by the payload format itself. diff --git a/CHANGELOG.md b/CHANGELOG.md index 84e0b6b..896bffb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - **Chunked payload generation** via `cloudsync_payload_chunks()`, available as a SQLite virtual table and as a PostgreSQL set-returning function. The API emits transport-sized payload chunks and transparently fragments oversized BLOB/TEXT values into v3 fragment payloads. - **`payload_max_chunk_size` global setting** for controlling generated chunk size. The default is 5 MB and values below the 256 KB technical minimum are clamped. +- **`exclude_filter_site_id` argument** for `cloudsync_payload_chunks()`. When set, the function streams changes from every site **except** `filter_site_id`, which is what the `/check` download path needs (a peer must not receive its own changes back). The default (omitted/`false`) preserves the existing single-site behavior. Passing the flag without a `filter_site_id` is an error. +- **`cloudsync_uuid_text()` / `cloudsync_uuid_blob()`** scalar functions on both SQLite and PostgreSQL, converting between the 16-byte binary `site_id` and its canonical UUID string. `cloudsync_uuid_text()` takes an optional `dash_format` argument (default `true`); `cloudsync_uuid_blob()` accepts dashed or undashed, case-insensitive input. These let string-based callers (e.g. the `/check` endpoint) pass a `site_id` to `cloudsync_payload_chunks()`. - **Payload chunking documentation** in `API.md` and `PERFORMANCE.md`, including the explicit memory note that chunking bounds transport payloads but the database must still materialize a completed single BLOB/TEXT value when it is applied. +- **PostgreSQL `1.0 -> 1.1` upgrade script** (`migrations/cloudsync--1.0--1.1.sql`) for the new chunked-payload SQL surface, so existing deployments can `ALTER EXTENSION cloudsync UPDATE`. ### Changed diff --git a/Makefile b/Makefile index 189bf0f..5b1bc86 100644 --- a/Makefile +++ b/Makefile @@ -86,6 +86,12 @@ COV_FILES = $(filter-out $(SRC_DIR)/lz4.c $(NETWORK_DIR)/network.c $(SQLITE_IMPL CURL_LIB = $(CURL_DIR)/$(PLATFORM)/libcurl.a TEST_TARGET = $(patsubst %.c,$(DIST_DIR)/%$(EXE), $(notdir $(TEST_SRC))) +# Build curl hermetically: neutralize the developer's ambient build env so +# curl's ./configure compile tests aren't broken by overrides leaking in +# (e.g. exported LDFLAGS/CPPFLAGS/LIBS pointing at Homebrew). Build flags for +# curl are supplied explicitly via CURL_CONFIG. +CURL_CONFIG_ENV = LDFLAGS= CPPFLAGS= LIBS= CFLAGS= + # Platform-specific settings ifeq ($(PLATFORM),windows) TARGET := $(DIST_DIR)/cloudsync.dll @@ -326,7 +332,7 @@ else unzip $(CURL_DIR)/src/curl.zip -d $(CURL_DIR)/src/. endif - cd $(CURL_SRC) && ./configure \ + cd $(CURL_SRC) && $(CURL_CONFIG_ENV) ./configure \ --without-libpsl \ --disable-alt-svc \ --disable-ares \ diff --git a/src/cloudsync.c b/src/cloudsync.c index 05546b6..1683e9e 100644 --- a/src/cloudsync.c +++ b/src/cloudsync.c @@ -61,6 +61,7 @@ #define CLOUDSYNC_PAYLOAD_MIN_VERSION_WITH_CHECKSUM CLOUDSYNC_PAYLOAD_VERSION_2 #define CLOUDSYNC_PAYLOAD_FRAGMENT_PREFIX "__cloudsync_frag_v1__:" #define CLOUDSYNC_PAYLOAD_FRAGMENT_STALE_SECONDS (24*60*60) +#define CLOUDSYNC_PAYLOAD_FRAGMENT_CLEANUP_MIN_INTERVAL (60) #ifndef MAX #define MAX(a, b) (((a)>(b))?(a):(b)) @@ -165,7 +166,11 @@ struct cloudsync_context { int64_t pending_db_version; // used to set an order inside each transaction int seq; - + + // wall-clock (time()) of the last stale v3-fragment GC; throttles the GC so + // it does not run a full table scan on every applied fragment (0 = never run) + int64_t last_fragment_cleanup; + // optional schema_name to be set in the cloudsync_table_context char *current_schema; @@ -3681,10 +3686,23 @@ static int cloudsync_payload_bind_param_callback (void *xdata, int index, int ty } static int cloudsync_payload_fragments_cleanup_stale (cloudsync_context *data) { + // Stale-fragment GC is pure maintenance (it removes incomplete fragment groups + // older than CLOUDSYNC_PAYLOAD_FRAGMENT_STALE_SECONDS), so it has no correctness + // deadline. It runs a full GROUP BY scan of the fragments table; calling it on + // every applied fragment would be O(n^2) for a heavily-fragmented value, since + // each fragment arrives as its own apply call. Throttle it to at most once per + // CLOUDSYNC_PAYLOAD_FRAGMENT_CLEANUP_MIN_INTERVAL per connection. + int64_t now = (int64_t)time(NULL); + if (data->last_fragment_cleanup != 0 && + now - data->last_fragment_cleanup < CLOUDSYNC_PAYLOAD_FRAGMENT_CLEANUP_MIN_INTERVAL) { + return DBRES_OK; + } + data->last_fragment_cleanup = now; + dbvm_t *vm = NULL; int rc = databasevm_prepare(data, SQL_PAYLOAD_FRAGMENTS_CLEANUP_STALE, &vm, 0); if (rc != DBRES_OK) return rc; - int64_t cutoff = (int64_t)time(NULL) - CLOUDSYNC_PAYLOAD_FRAGMENT_STALE_SECONDS; + int64_t cutoff = now - CLOUDSYNC_PAYLOAD_FRAGMENT_STALE_SECONDS; rc = databasevm_bind_int(vm, 1, cutoff); if (rc == DBRES_OK) rc = databasevm_step(vm); databasevm_finalize(vm); diff --git a/src/postgresql/cloudsync.sql.in b/src/postgresql/cloudsync.sql.in index 89665d4..83871e2 100644 --- a/src/postgresql/cloudsync.sql.in +++ b/src/postgresql/cloudsync.sql.in @@ -152,7 +152,8 @@ CREATE OR REPLACE AGGREGATE cloudsync_payload_encode(text, bytea, text, bytea, b CREATE OR REPLACE FUNCTION cloudsync_payload_chunks( since_db_version bigint DEFAULT NULL, filter_site_id bytea DEFAULT NULL, - until_db_version bigint DEFAULT NULL + until_db_version bigint DEFAULT NULL, + exclude_filter_site_id boolean DEFAULT false ) RETURNS TABLE ( payload bytea, @@ -166,6 +167,19 @@ RETURNS TABLE ( AS 'MODULE_PATHNAME', 'cloudsync_payload_chunks' LANGUAGE C VOLATILE; +-- UUID binary <-> canonical string helpers (the changes table stores site_id as +-- the 16-byte binary UUID; these let string-based callers such as the /check +-- endpoint pass a site_id to cloudsync_payload_chunks). +CREATE OR REPLACE FUNCTION cloudsync_uuid_text(uuid bytea, dash_format boolean DEFAULT true) +RETURNS text +AS 'MODULE_PATHNAME', 'cloudsync_uuid_text' +LANGUAGE C IMMUTABLE; + +CREATE OR REPLACE FUNCTION cloudsync_uuid_blob(uuid text) +RETURNS bytea +AS 'MODULE_PATHNAME', 'cloudsync_uuid_blob' +LANGUAGE C IMMUTABLE; + -- Payload decoding and application CREATE OR REPLACE FUNCTION cloudsync_payload_decode(payload bytea) RETURNS integer diff --git a/src/postgresql/cloudsync_postgresql.c b/src/postgresql/cloudsync_postgresql.c index af0ce77..ae76dbd 100644 --- a/src/postgresql/cloudsync_postgresql.c +++ b/src/postgresql/cloudsync_postgresql.c @@ -194,10 +194,41 @@ Datum cloudsync_uuid (PG_FUNCTION_ARGS) { // Parse into PostgreSQL UUID type Datum uuid_datum = DirectFunctionCall1(uuid_in, CStringGetDatum(uuid_str)); - + PG_RETURN_DATUM(uuid_datum); } +// cloudsync_uuid_text(bytea, [dash_format]) - 16-byte UUID -> canonical string +PG_FUNCTION_INFO_V1(cloudsync_uuid_text); +Datum cloudsync_uuid_text (PG_FUNCTION_ARGS) { + if (PG_ARGISNULL(0)) PG_RETURN_NULL(); + bytea *b = PG_GETARG_BYTEA_PP(0); + if (VARSIZE_ANY_EXHDR(b) != UUID_LEN) { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cloudsync_uuid_text: expected a 16-byte value"))); + } + bool dash_format = PG_ARGISNULL(1) ? true : PG_GETARG_BOOL(1); + char uuid_str[UUID_STR_MAXLEN]; + cloudsync_uuid_v7_stringify((uint8_t *)VARDATA_ANY(b), uuid_str, dash_format); + PG_RETURN_TEXT_P(cstring_to_text(uuid_str)); +} + +// cloudsync_uuid_blob(text) - UUID string -> 16-byte value (dashed/undashed) +PG_FUNCTION_INFO_V1(cloudsync_uuid_blob); +Datum cloudsync_uuid_blob (PG_FUNCTION_ARGS) { + if (PG_ARGISNULL(0)) PG_RETURN_NULL(); + text *t = PG_GETARG_TEXT_PP(0); + uint8_t uuid[UUID_LEN]; + if (cloudsync_uuid_v7_parse(VARDATA_ANY(t), (int)VARSIZE_ANY_EXHDR(t), uuid) != 0) { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cloudsync_uuid_blob: malformed UUID string"))); + } + bytea *result = (bytea *)palloc(VARHDRSZ + UUID_LEN); + SET_VARSIZE(result, VARHDRSZ + UUID_LEN); + memcpy(VARDATA(result), uuid, UUID_LEN); + PG_RETURN_BYTEA_P(result); +} + // cloudsync_db_version() - Get current database version PG_FUNCTION_INFO_V1(cloudsync_db_version); Datum cloudsync_db_version (PG_FUNCTION_ARGS) { @@ -1271,7 +1302,16 @@ Datum cloudsync_payload_chunks(PG_FUNCTION_ARGS) { int64 since = PG_ARGISNULL(0) ? dbutils_settings_get_int64_value(data, CLOUDSYNC_KEY_SEND_DBVERSION) : PG_GETARG_INT64(0); bytea *site_id = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_PP(1); - if (!site_id) { + bool exclude = PG_ARGISNULL(3) ? false : PG_GETARG_BOOL(3); + // Site filter resolution: + // exclude=true -> all sites except filter_site_id (CHECK path); site required + // filter given -> only that site + // default -> local site (send path, unchanged) + if (exclude && !site_id) { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cloudsync_payload_chunks: exclude_filter_site_id requires a non-NULL filter_site_id"))); + } + if (!exclude && !site_id) { site_id = (bytea *)palloc(VARHDRSZ + UUID_LEN); SET_VARSIZE(site_id, VARHDRSZ + UUID_LEN); memcpy(VARDATA(site_id), cloudsync_siteid(data), UUID_LEN); @@ -1282,7 +1322,10 @@ Datum cloudsync_payload_chunks(PG_FUNCTION_ARGS) { Oid mt[1] = {BYTEAOID}; Datum mv[1] = {PointerGetDatum(site_id)}; char mn[1] = {' '}; - int mrc = SPI_execute_with_args("SELECT COALESCE(MAX(db_version),0) FROM cloudsync_changes_select(0,$1)", 1, mt, mv, mn, true, 1); + const char *mxq = exclude + ? "SELECT COALESCE(MAX(db_version),0) FROM cloudsync_changes_select(0,NULL) WHERE site_id <> $1" + : "SELECT COALESCE(MAX(db_version),0) FROM cloudsync_changes_select(0,$1)"; + int mrc = SPI_execute_with_args(mxq, 1, mt, mv, mn, true, 1); if (mrc == SPI_OK_SELECT && SPI_processed > 0) { bool isnull = false; Datum d = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull); @@ -1294,9 +1337,16 @@ Datum cloudsync_payload_chunks(PG_FUNCTION_ARGS) { StringInfoData q; initStringInfo(&q); - appendStringInfoString(&q, - "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq " - "FROM cloudsync_changes_select($1,$2) WHERE db_version <= $3 ORDER BY db_version, seq ASC"); + if (exclude) { + // $1=since (into changes_select), $2=site to exclude, $3=until watermark + appendStringInfoString(&q, + "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq " + "FROM cloudsync_changes_select($1,NULL) WHERE site_id <> $2 AND db_version <= $3 ORDER BY db_version, seq ASC"); + } else { + appendStringInfoString(&q, + "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq " + "FROM cloudsync_changes_select($1,$2) WHERE db_version <= $3 ORDER BY db_version, seq ASC"); + } Oid argtypes[3] = {INT8OID, BYTEAOID, INT8OID}; Datum values[3] = {Int64GetDatum(since), PointerGetDatum(site_id), Int64GetDatum(until)}; char nulls[3] = {' ', ' ', ' '}; diff --git a/src/postgresql/migrations/cloudsync--1.0--1.1.sql b/src/postgresql/migrations/cloudsync--1.0--1.1.sql new file mode 100644 index 0000000..6c646a6 --- /dev/null +++ b/src/postgresql/migrations/cloudsync--1.0--1.1.sql @@ -0,0 +1,36 @@ +-- CloudSync PostgreSQL extension upgrade: 1.0 -> 1.1 +-- +-- Adds the chunked-payload SQL surface introduced in 1.1: +-- * cloudsync_payload_chunks() set-returning function (with the +-- exclude_filter_site_id flag used by the /check download path) +-- * cloudsync_uuid_text() / cloudsync_uuid_blob() UUID conversion helpers +-- +-- Run automatically by: ALTER EXTENSION cloudsync UPDATE; + +CREATE OR REPLACE FUNCTION cloudsync_payload_chunks( + since_db_version bigint DEFAULT NULL, + filter_site_id bytea DEFAULT NULL, + until_db_version bigint DEFAULT NULL, + exclude_filter_site_id boolean DEFAULT false +) +RETURNS TABLE ( + payload bytea, + chunk_index bigint, + payload_size bigint, + rows bigint, + db_version_min bigint, + db_version_max bigint, + watermark_db_version bigint +) +AS 'MODULE_PATHNAME', 'cloudsync_payload_chunks' +LANGUAGE C VOLATILE; + +CREATE OR REPLACE FUNCTION cloudsync_uuid_text(uuid bytea, dash_format boolean DEFAULT true) +RETURNS text +AS 'MODULE_PATHNAME', 'cloudsync_uuid_text' +LANGUAGE C IMMUTABLE; + +CREATE OR REPLACE FUNCTION cloudsync_uuid_blob(uuid text) +RETURNS bytea +AS 'MODULE_PATHNAME', 'cloudsync_uuid_blob' +LANGUAGE C IMMUTABLE; diff --git a/src/sqlite/cloudsync_sqlite.c b/src/sqlite/cloudsync_sqlite.c index 15af3a9..02176e9 100644 --- a/src/sqlite/cloudsync_sqlite.c +++ b/src/sqlite/cloudsync_sqlite.c @@ -136,12 +136,42 @@ void dbsync_seq (sqlite3_context *context, int argc, sqlite3_value **argv) { void dbsync_uuid (sqlite3_context *context, int argc, sqlite3_value **argv) { DEBUG_FUNCTION("cloudsync_uuid"); - + char value[UUID_STR_MAXLEN]; char *uuid = cloudsync_uuid_v7_string(value, true); sqlite3_result_text(context, uuid, -1, SQLITE_TRANSIENT); } +// cloudsync_uuid_text(blob, [dash_format]) -> canonical UUID string +void dbsync_uuid_text (sqlite3_context *context, int argc, sqlite3_value **argv) { + DEBUG_FUNCTION("cloudsync_uuid_text"); + + if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { sqlite3_result_null(context); return; } + if (sqlite3_value_type(argv[0]) != SQLITE_BLOB || sqlite3_value_bytes(argv[0]) != UUID_LEN) { + sqlite3_result_error(context, "cloudsync_uuid_text: expected a 16-byte BLOB.", -1); + return; + } + bool dash_format = (argc > 1) ? (sqlite3_value_int(argv[1]) != 0) : true; + char value[UUID_STR_MAXLEN]; + cloudsync_uuid_v7_stringify((uint8_t *)sqlite3_value_blob(argv[0]), value, dash_format); + sqlite3_result_text(context, value, -1, SQLITE_TRANSIENT); +} + +// cloudsync_uuid_blob(text) -> 16-byte UUID blob (accepts dashed/undashed) +void dbsync_uuid_blob (sqlite3_context *context, int argc, sqlite3_value **argv) { + DEBUG_FUNCTION("cloudsync_uuid_blob"); + + if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { sqlite3_result_null(context); return; } + const char *str = (const char *)sqlite3_value_text(argv[0]); + int len = sqlite3_value_bytes(argv[0]); + uint8_t uuid[UUID_LEN]; + if (!str || cloudsync_uuid_v7_parse(str, len, uuid) != 0) { + sqlite3_result_error(context, "cloudsync_uuid_blob: malformed UUID string.", -1); + return; + } + sqlite3_result_blob(context, uuid, UUID_LEN, SQLITE_TRANSIENT); +} + // MARK: - void dbsync_set (sqlite3_context *context, int argc, sqlite3_value **argv) { @@ -1114,7 +1144,7 @@ static int payload_chunks_connect(sqlite3 *db, void *aux, int argc, const char * int rc = sqlite3_declare_vtab(db, "CREATE TABLE x(payload BLOB, chunk_index INTEGER, payload_size INTEGER, rows INTEGER, " "db_version_min INTEGER, db_version_max INTEGER, watermark_db_version INTEGER, " - "since_db_version HIDDEN, site_id HIDDEN, until_db_version HIDDEN)"); + "since_db_version HIDDEN, site_id HIDDEN, until_db_version HIDDEN, exclude_filter_site_id HIDDEN)"); if (rc != SQLITE_OK) return rc; cloudsync_payload_chunks_vtab *p = sqlite3_malloc64(sizeof(*p)); if (!p) return SQLITE_NOMEM; @@ -1150,15 +1180,18 @@ static int payload_chunks_best_index(sqlite3_vtab *vtab, sqlite3_index_info *idx UNUSED_PARAMETER(vtab); int argv_index = 1; int idxnum = 0; - for (int i = 0; i < idxinfo->nConstraint; ++i) { - struct sqlite3_index_constraint *cn = &idxinfo->aConstraint[i]; - if (!cn->usable || cn->op != SQLITE_INDEX_CONSTRAINT_EQ) continue; - if (cn->iColumn == 7 || cn->iColumn == 8 || cn->iColumn == 9) { + // Assign argvIndex in canonical hidden-column order (7..10) so xFilter can + // read argv in a fixed order regardless of how SQLite presents constraints. + // Hidden columns: 7=since_db_version, 8=site_id, 9=until_db_version, + // 10=exclude_filter_site_id. + for (int col = 7; col <= 10; ++col) { + for (int i = 0; i < idxinfo->nConstraint; ++i) { + struct sqlite3_index_constraint *cn = &idxinfo->aConstraint[i]; + if (!cn->usable || cn->op != SQLITE_INDEX_CONSTRAINT_EQ || cn->iColumn != col) continue; idxinfo->aConstraintUsage[i].argvIndex = argv_index++; idxinfo->aConstraintUsage[i].omit = 1; - if (cn->iColumn == 7) idxnum |= 1; - if (cn->iColumn == 8) idxnum |= 2; - if (cn->iColumn == 9) idxnum |= 4; + idxnum |= (1 << (col - 7)); + break; // at most one constraint consumed per hidden column } } idxinfo->idxNum = idxnum; @@ -1332,15 +1365,47 @@ static int payload_chunks_filter(sqlite3_vtab_cursor *cursor, int idxnum, const int argi = 0; int64_t since = dbutils_settings_get_int64_value(data, CLOUDSYNC_KEY_SEND_DBVERSION); - const void *site_id = cloudsync_siteid(data); - int site_id_len = UUID_LEN; + const void *site_id = NULL; + int site_id_len = 0; + bool site_id_given = false; int64_t until = 0; + bool exclude = false; if (idxnum & 1) since = sqlite3_value_int64(argv[argi++]); - if (idxnum & 2) { site_id = sqlite3_value_blob(argv[argi]); site_id_len = sqlite3_value_bytes(argv[argi]); argi++; } + if (idxnum & 2) { + if (sqlite3_value_type(argv[argi]) != SQLITE_NULL) { + site_id = sqlite3_value_blob(argv[argi]); + site_id_len = sqlite3_value_bytes(argv[argi]); + site_id_given = true; + } + argi++; + } if (idxnum & 4) until = sqlite3_value_int64(argv[argi++]); + if (idxnum & 8) exclude = (sqlite3_value_int(argv[argi++]) != 0); + + // Resolve the site filter: + // exclude=true -> all sites except filter_site_id (CHECK path); site required + // filter given -> only that site + // default -> local site (send path, unchanged) + const char *site_op; + if (exclude) { + if (!site_id_given) { + c->vtab->base.zErrMsg = sqlite3_mprintf( + "cloudsync_payload_chunks: exclude_filter_site_id requires a non-NULL site_id"); + return SQLITE_ERROR; + } + site_op = "<>"; + } else { + site_op = "="; + if (!site_id_given) { site_id = cloudsync_siteid(data); site_id_len = UUID_LEN; } + } + if (until == 0) { + char *mxsql = sqlite3_mprintf( + "SELECT COALESCE(MAX(db_version),0) FROM cloudsync_changes WHERE site_id%s?", site_op); + if (!mxsql) return SQLITE_NOMEM; sqlite3_stmt *mx = NULL; - int rc = sqlite3_prepare_v2(c->vtab->db, "SELECT COALESCE(MAX(db_version),0) FROM cloudsync_changes WHERE site_id=?", -1, &mx, NULL); + int rc = sqlite3_prepare_v2(c->vtab->db, mxsql, -1, &mx, NULL); + sqlite3_free(mxsql); if (rc != SQLITE_OK) return rc; sqlite3_bind_blob(mx, 1, site_id, site_id_len, SQLITE_TRANSIENT); if (sqlite3_step(mx) == SQLITE_ROW) until = sqlite3_column_int64(mx, 0); @@ -1348,9 +1413,13 @@ static int payload_chunks_filter(sqlite3_vtab_cursor *cursor, int idxnum, const } c->watermark = until; - const char *sql = "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq " - "FROM cloudsync_changes WHERE db_version>? AND site_id=? AND db_version<=? ORDER BY db_version, seq ASC"; + char *sql = sqlite3_mprintf( + "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq " + "FROM cloudsync_changes WHERE db_version>? AND site_id%s? AND db_version<=? ORDER BY db_version, seq ASC", + site_op); + if (!sql) return SQLITE_NOMEM; int rc = sqlite3_prepare_v2(c->vtab->db, sql, -1, &c->src, NULL); + sqlite3_free(sql); if (rc != SQLITE_OK) return rc; sqlite3_bind_int64(c->src, 1, since); sqlite3_bind_blob(c->src, 2, site_id, site_id_len, SQLITE_TRANSIENT); @@ -1782,7 +1851,14 @@ int dbsync_register_functions (sqlite3 *db, char **pzErrMsg) { rc = dbsync_register_function(db, "cloudsync_uuid", dbsync_uuid, 0, pzErrMsg, ctx, NULL); if (rc != SQLITE_OK) return rc; - + + rc = dbsync_register_function(db, "cloudsync_uuid_text", dbsync_uuid_text, 1, pzErrMsg, ctx, NULL); + if (rc != SQLITE_OK) return rc; + rc = dbsync_register_function(db, "cloudsync_uuid_text", dbsync_uuid_text, 2, pzErrMsg, ctx, NULL); + if (rc != SQLITE_OK) return rc; + rc = dbsync_register_function(db, "cloudsync_uuid_blob", dbsync_uuid_blob, 1, pzErrMsg, ctx, NULL); + if (rc != SQLITE_OK) return rc; + // PAYLOAD rc = dbsync_register_aggregate(db, "cloudsync_payload_encode", dbsync_payload_encode_step, dbsync_payload_encode_final, -1, pzErrMsg, ctx, NULL); if (rc != SQLITE_OK) return rc; diff --git a/src/utils.c b/src/utils.c index fff6cdd..371e29c 100644 --- a/src/utils.c +++ b/src/utils.c @@ -112,11 +112,40 @@ char *cloudsync_uuid_v7_stringify (uint8_t uuid[UUID_LEN], char value[UUID_STR_M char *cloudsync_uuid_v7_string (char value[UUID_STR_MAXLEN], bool dash_format) { uint8_t uuid[UUID_LEN]; - + if (cloudsync_uuid_v7(uuid) != 0) return NULL; return cloudsync_uuid_v7_stringify(uuid, value, dash_format); } +static int cloudsync_hex_nibble (char c) { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + return -1; +} + +int cloudsync_uuid_v7_parse (const char *str, int len, uint8_t out[UUID_LEN]) { + if (!str || !out) return -1; + if (len < 0) len = (int)strlen(str); + + // Accept the canonical dashed form (8-4-4-4-12) or bare 32-hex; dashes, + // if present, must be at the canonical positions. Parse 32 hex nibbles. + int nibbles = 0; + for (int i = 0; i < len; ++i) { + char c = str[i]; + if (c == '-') continue; + int hi = cloudsync_hex_nibble(c); + if (hi < 0) return -1; + if (i + 1 >= len) return -1; + int lo = cloudsync_hex_nibble(str[i + 1]); + if (lo < 0) return -1; + if (nibbles >= UUID_LEN) return -1; + out[nibbles++] = (uint8_t)((hi << 4) | lo); + ++i; // consumed the low nibble too + } + return (nibbles == UUID_LEN) ? 0 : -1; +} + int cloudsync_uuid_v7_compare (uint8_t value1[UUID_LEN], uint8_t value2[UUID_LEN]) { // reconstruct the timestamp by reversing the bit shifts and combining the bytes uint64_t t1 = ((uint64_t)value1[0] << 40) | ((uint64_t)value1[1] << 32) | ((uint64_t)value1[2] << 24) | diff --git a/src/utils.h b/src/utils.h index 3f0e098..f71b1fe 100644 --- a/src/utils.h +++ b/src/utils.h @@ -133,6 +133,10 @@ int cloudsync_uuid_v7 (uint8_t value[UUID_LEN]); int cloudsync_uuid_v7_compare (uint8_t value1[UUID_LEN], uint8_t value2[UUID_LEN]); char *cloudsync_uuid_v7_string (char value[UUID_STR_MAXLEN], bool dash_format); char *cloudsync_uuid_v7_stringify (uint8_t uuid[UUID_LEN], char value[UUID_STR_MAXLEN], bool dash_format); +// Parse a UUID string into its UUID_LEN binary form. Tolerant: accepts the +// canonical dashed form (36 chars) and the bare 32-hex form, case-insensitive. +// Returns 0 on success, -1 on malformed input. +int cloudsync_uuid_v7_parse (const char *str, int len, uint8_t out[UUID_LEN]); uint64_t fnv1a_hash(const char *data, size_t len); char *cloudsync_string_replace_prefix(const char *input, char *prefix, char *replacement); diff --git a/test/postgresql/39_payload_chunks.sql b/test/postgresql/52_payload_chunks.sql similarity index 54% rename from test/postgresql/39_payload_chunks.sql rename to test/postgresql/52_payload_chunks.sql index 684cac6..2dec689 100644 --- a/test/postgresql/39_payload_chunks.sql +++ b/test/postgresql/52_payload_chunks.sql @@ -1,18 +1,18 @@ -- Payload chunks and transparent large-value fragmentation -\set testid '39-chunks' +\set testid '52-chunks' \ir helper_test_init.sql \connect postgres \ir helper_psql_conn_setup.sql -DROP DATABASE IF EXISTS cloudsync_test_39_chunks_src; -DROP DATABASE IF EXISTS cloudsync_test_39_chunks_dst; -DROP DATABASE IF EXISTS cloudsync_test_39_chunks_legacy; -CREATE DATABASE cloudsync_test_39_chunks_src; -CREATE DATABASE cloudsync_test_39_chunks_dst; -CREATE DATABASE cloudsync_test_39_chunks_legacy; - -\connect cloudsync_test_39_chunks_src +DROP DATABASE IF EXISTS cloudsync_test_52_chunks_src; +DROP DATABASE IF EXISTS cloudsync_test_52_chunks_dst; +DROP DATABASE IF EXISTS cloudsync_test_52_chunks_legacy; +CREATE DATABASE cloudsync_test_52_chunks_src; +CREATE DATABASE cloudsync_test_52_chunks_dst; +CREATE DATABASE cloudsync_test_52_chunks_legacy; + +\connect cloudsync_test_52_chunks_src \ir helper_psql_conn_setup.sql CREATE EXTENSION IF NOT EXISTS cloudsync; CREATE TABLE payload_chunk_test ( @@ -75,6 +75,68 @@ SELECT (:explicit_arg_chunk_count::int = :chunk_count::int) AS explicit_args_ok SELECT (:fail::int + 1) AS fail \gset \endif +-- exclude_filter_site_id flag: every change here originates from the local +-- site, so excluding it must yield zero chunks while including it yields the +-- full set. This proves the predicate flips between "= site" and "<> site". +SELECT count(*) AS excl_local_chunks +FROM cloudsync_payload_chunks(0, cloudsync_siteid(), NULL, true) \gset + +SELECT count(*) AS incl_local_chunks +FROM cloudsync_payload_chunks(0, cloudsync_siteid(), NULL, false) \gset + +SELECT (:excl_local_chunks::int = 0 AND :incl_local_chunks::int > 0) AS exclude_flag_ok \gset +\if :exclude_flag_ok +\echo [PASS] (:testid) exclude_filter_site_id flips the site filter (exclude local -> 0, include -> :incl_local_chunks) +\else +\echo [FAIL] (:testid) exclude_filter_site_id did not flip the filter (exclude=:excl_local_chunks include=:incl_local_chunks) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- exclude=true without a filter_site_id must raise an error +CREATE TEMP TABLE _excl_err(ok bool); +DO $$ +BEGIN + PERFORM 1 FROM cloudsync_payload_chunks(0, NULL, NULL, true); + INSERT INTO _excl_err VALUES (false); +EXCEPTION WHEN OTHERS THEN + INSERT INTO _excl_err VALUES (true); +END $$; +SELECT ok AS exclude_no_site_errors FROM _excl_err \gset +DROP TABLE _excl_err; +\if :exclude_no_site_errors +\echo [PASS] (:testid) exclude_filter_site_id without a site_id raises an error +\else +\echo [FAIL] (:testid) exclude_filter_site_id without a site_id did not error +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- UUID text<->blob roundtrip, including the /check string-argument path +SELECT ( + cloudsync_uuid_blob(cloudsync_uuid_text(cloudsync_siteid())) = cloudsync_siteid() + AND cloudsync_uuid_blob(cloudsync_uuid_text(cloudsync_siteid(), false)) = cloudsync_siteid() + AND cloudsync_uuid_blob(upper(cloudsync_uuid_text(cloudsync_siteid()))) = cloudsync_siteid() + AND length(cloudsync_uuid_text(cloudsync_siteid())) = 36 + AND length(cloudsync_uuid_text(cloudsync_siteid(), false)) = 32 +) AS uuid_conv_ok \gset +\if :uuid_conv_ok +\echo [PASS] (:testid) cloudsync_uuid_text/cloudsync_uuid_blob roundtrip (dashed, undashed, uppercase) +\else +\echo [FAIL] (:testid) UUID conversion roundtrip mismatch +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- A string-derived site_id (as the /check endpoint would pass it) selects the +-- same chunks as the binary site_id. +SELECT count(*) AS str_arg_chunks +FROM cloudsync_payload_chunks(0, cloudsync_uuid_blob(cloudsync_uuid_text(cloudsync_siteid())), NULL, false) \gset +SELECT (:str_arg_chunks::int = :incl_local_chunks::int) AS str_arg_ok \gset +\if :str_arg_ok +\echo [PASS] (:testid) String-derived site_id via cloudsync_uuid_blob matches the binary site_id +\else +\echo [FAIL] (:testid) String-derived site_id mismatch (:str_arg_chunks vs :incl_local_chunks) +SELECT (:fail::int + 1) AS fail \gset +\endif + SELECT md5(string_agg(id || ':' || note || ':' || encode(data, 'hex'), '|' ORDER BY id)) AS src_hash, count(*) AS src_count @@ -97,7 +159,7 @@ SELECT (:legacy_payload_len::int > 262144) AS legacy_payload_large_ok \gset SELECT (:fail::int + 1) AS fail \gset \endif -\connect cloudsync_test_39_chunks_dst +\connect cloudsync_test_52_chunks_dst \ir helper_psql_conn_setup.sql CREATE EXTENSION IF NOT EXISTS cloudsync; CREATE TABLE payload_chunk_test ( @@ -113,6 +175,32 @@ INSERT INTO chunk_transport(ord, payload) SELECT ord::int, decode(chunk_hex, 'hex') FROM unnest(string_to_array(:'chunks_hex', ',')) WITH ORDINALITY AS t(chunk_hex, ord); +-- Stale-fragment GC: on this fresh connection no fragment has been applied yet, +-- so the first applied v3 fragment triggers cleanup of the old incomplete group. +CREATE TABLE IF NOT EXISTS cloudsync_payload_fragments ( + value_id TEXT NOT NULL, part_index BIGINT NOT NULL, part_count BIGINT NOT NULL, + total_size BIGINT NOT NULL, checksum TEXT NOT NULL, + created_at BIGINT NOT NULL DEFAULT (EXTRACT(EPOCH FROM now())::bigint), + tbl TEXT NOT NULL, pk BYTEA NOT NULL, col_name TEXT NOT NULL, col_version BIGINT NOT NULL, + db_version BIGINT NOT NULL, site_id BYTEA NOT NULL, cl BIGINT NOT NULL, seq BIGINT NOT NULL, + fragment BYTEA NOT NULL, PRIMARY KEY(value_id, part_index) +); +INSERT INTO cloudsync_payload_fragments +(value_id, part_index, part_count, total_size, checksum, created_at, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, fragment) +VALUES ('stale-incomplete', 0, 2, 10, '0000000000000000', 0, 'payload_chunk_test', '\x01', 'data', 1, 1, decode(repeat('00', 16), 'hex'), 1, 1, '\x00'); + +SELECT cloudsync_payload_apply(payload) AS stale_cleanup_apply +FROM chunk_transport WHERE get_byte(payload, 4) = 3 ORDER BY ord LIMIT 1 \gset + +SELECT (COUNT(*) = 0) AS stale_cleanup_ok +FROM cloudsync_payload_fragments WHERE value_id = 'stale-incomplete' \gset +\if :stale_cleanup_ok +\echo [PASS] (:testid) Stale incomplete fragment cleanup works (first apply on a fresh connection) +\else +\echo [FAIL] (:testid) Stale incomplete fragment cleanup failed +SELECT (:fail::int + 1) AS fail \gset +\endif + SELECT coalesce(sum(cloudsync_payload_apply(payload)), 0) AS chunk_apply_rows FROM (SELECT payload FROM chunk_transport ORDER BY ord DESC) AS ordered_chunks \gset @@ -129,46 +217,26 @@ SELECT (:'dst_hash' = :'src_hash' AND :dst_count::int = :src_count::int) AS chun SELECT (:fail::int + 1) AS fail \gset \endif -CREATE TABLE IF NOT EXISTS cloudsync_payload_fragments ( - value_id TEXT NOT NULL, - part_index BIGINT NOT NULL, - part_count BIGINT NOT NULL, - total_size BIGINT NOT NULL, - checksum TEXT NOT NULL, - created_at BIGINT NOT NULL DEFAULT (EXTRACT(EPOCH FROM now())::bigint), - tbl TEXT NOT NULL, - pk BYTEA NOT NULL, - col_name TEXT NOT NULL, - col_version BIGINT NOT NULL, - db_version BIGINT NOT NULL, - site_id BYTEA NOT NULL, - cl BIGINT NOT NULL, - seq BIGINT NOT NULL, - fragment BYTEA NOT NULL, - PRIMARY KEY(value_id, part_index) -); +-- Throttle check: the full apply above already ran the stale GC, so an old +-- group inserted now must NOT be removed by the next immediate fragment apply. +-- This proves the GC is not re-scanned on every applied fragment (the O(n^2) fix). INSERT INTO cloudsync_payload_fragments (value_id, part_index, part_count, total_size, checksum, created_at, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, fragment) -VALUES ('stale-incomplete', 0, 2, 10, '0000000000000000', 0, 'payload_chunk_test', '\x01', 'data', 1, 1, decode(repeat('00', 16), 'hex'), 1, 1, '\x00'); +VALUES ('stale-incomplete-2', 0, 2, 10, '0000000000000000', 0, 'payload_chunk_test', '\x02', 'data', 1, 1, decode(repeat('00', 16), 'hex'), 1, 1, '\x00'); -SELECT cloudsync_payload_apply(payload) AS stale_cleanup_apply -FROM chunk_transport -WHERE get_byte(payload, 4) = 3 -ORDER BY ord -LIMIT 1 \gset - -SELECT (COUNT(*) = 0) AS stale_cleanup_ok -FROM cloudsync_payload_fragments -WHERE value_id = 'stale-incomplete' \gset +SELECT cloudsync_payload_apply(payload) AS throttle_apply +FROM chunk_transport WHERE get_byte(payload, 4) = 3 ORDER BY ord LIMIT 1 \gset -\if :stale_cleanup_ok -\echo [PASS] (:testid) Stale incomplete fragment cleanup works +SELECT (COUNT(*) = 1) AS stale_throttle_ok +FROM cloudsync_payload_fragments WHERE value_id = 'stale-incomplete-2' \gset +\if :stale_throttle_ok +\echo [PASS] (:testid) Stale GC is throttled (not re-run on every applied fragment) \else -\echo [FAIL] (:testid) Stale incomplete fragment cleanup failed +\echo [FAIL] (:testid) Stale GC was not throttled (removed a fresh-inserted old group on the next apply) SELECT (:fail::int + 1) AS fail \gset \endif -\connect cloudsync_test_39_chunks_legacy +\connect cloudsync_test_52_chunks_legacy \ir helper_psql_conn_setup.sql CREATE EXTENSION IF NOT EXISTS cloudsync; CREATE TABLE payload_chunk_test ( @@ -195,7 +263,7 @@ SELECT (:fail::int + 1) AS fail \gset \ir helper_test_cleanup.sql \if :should_cleanup -DROP DATABASE IF EXISTS cloudsync_test_39_chunks_src; -DROP DATABASE IF EXISTS cloudsync_test_39_chunks_dst; -DROP DATABASE IF EXISTS cloudsync_test_39_chunks_legacy; +DROP DATABASE IF EXISTS cloudsync_test_52_chunks_src; +DROP DATABASE IF EXISTS cloudsync_test_52_chunks_dst; +DROP DATABASE IF EXISTS cloudsync_test_52_chunks_legacy; \endif diff --git a/test/postgresql/full_test.sql b/test/postgresql/full_test.sql index 1d2e492..42dfbbf 100644 --- a/test/postgresql/full_test.sql +++ b/test/postgresql/full_test.sql @@ -46,7 +46,6 @@ \ir 36_block_lww_round3.sql \ir 37_block_lww_round4.sql \ir 38_block_lww_round5.sql -\ir 39_payload_chunks.sql \ir 39_concurrent_write_apply.sql \ir 40_unsupported_algorithms.sql \ir 41_corrupted_payload.sql @@ -60,6 +59,7 @@ \ir 49_row_filter_prefill.sql \ir 50_block_lww_existing_data.sql \ir 51_stale_table_settings_dropped_meta.sql +\ir 52_payload_chunks.sql -- 'Test summary' \echo '\nTest summary:' diff --git a/test/unit.c b/test/unit.c index 3bd74cd..331f0b8 100644 --- a/test/unit.c +++ b/test/unit.c @@ -12091,6 +12091,13 @@ bool do_test_payload_chunks_large_values (bool print_result, bool cleanup_databa if (!test_payload_chunks_tables_equal(db[0], db[1])) goto finalize; if (first_v3_chunk < 0) goto finalize; + + // Reopen db[1] so the v3-fragment stale-GC throttle starts fresh on this + // connection: the first fragment applied below runs the stale cleanup. + close_db(db[1]); + db[1] = do_create_database_file(1, timestamp, saved_counter); + if (!db[1]) goto finalize; + rc = sqlite3_exec(db[1], "CREATE TABLE IF NOT EXISTS cloudsync_payload_fragments (" "value_id TEXT NOT NULL, part_index INTEGER NOT NULL, part_count INTEGER NOT NULL, total_size INTEGER NOT NULL, " @@ -12104,14 +12111,15 @@ bool do_test_payload_chunks_large_values (bool print_result, bool cleanup_databa NULL, NULL, NULL); if (rc != SQLITE_OK) goto finalize; + // First fragment apply on the fresh connection -> stale GC runs -> removed. rc = sqlite3_prepare_v2(db[1], "SELECT cloudsync_payload_apply(?);", -1, &apply, NULL); if (rc != SQLITE_OK) goto finalize; rc = sqlite3_bind_blob(apply, 1, chunks[first_v3_chunk].data, chunks[first_v3_chunk].len, SQLITE_STATIC); if (rc != SQLITE_OK) goto finalize; rc = sqlite3_step(apply); if (rc != SQLITE_ROW) goto finalize; - sqlite3_finalize(apply); - apply = NULL; + sqlite3_reset(apply); + sqlite3_clear_bindings(apply); rc = sqlite3_prepare_v2(db[1], "SELECT COUNT(*) FROM cloudsync_payload_fragments WHERE value_id='stale-incomplete';", -1, &stmt, NULL); if (rc != SQLITE_OK) goto finalize; @@ -12120,6 +12128,29 @@ bool do_test_payload_chunks_large_values (bool print_result, bool cleanup_databa sqlite3_finalize(stmt); stmt = NULL; + // Throttle check: an old group inserted now must NOT be removed by the very + // next fragment apply, because the GC just ran (within the throttle window). + // This proves the cleanup is not re-scanned on every applied fragment. + rc = sqlite3_exec(db[1], + "INSERT OR REPLACE INTO cloudsync_payload_fragments " + "(value_id, part_index, part_count, total_size, checksum, created_at, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, fragment) " + "VALUES ('stale-incomplete-2', 0, 2, 10, '0000000000000000', 0, 'payload_chunk_test', x'02', 'data', 1, 1, zeroblob(16), 1, 1, x'00');", + NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_bind_blob(apply, 1, chunks[first_v3_chunk].data, chunks[first_v3_chunk].len, SQLITE_STATIC); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_step(apply); + if (rc != SQLITE_ROW) goto finalize; + sqlite3_finalize(apply); + apply = NULL; + + rc = sqlite3_prepare_v2(db[1], "SELECT COUNT(*) FROM cloudsync_payload_fragments WHERE value_id='stale-incomplete-2';", -1, &stmt, NULL); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_step(stmt); + if (rc != SQLITE_ROW || sqlite3_column_int(stmt, 0) != 1) goto finalize; // throttled -> still present + sqlite3_finalize(stmt); + stmt = NULL; + // The legacy aggregate API must still produce an applyable monolithic payload. rc = sqlite3_prepare_v2(db[0], "SELECT cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) " @@ -12172,6 +12203,123 @@ bool do_test_payload_chunks_large_values (bool print_result, bool cleanup_databa return result; } +// Apply every payload chunk produced by `q` (column 0 = payload blob, run on +// src) into `dst`. Returns SQLITE_OK on success. +static int test_chunks_apply_all (sqlite3_stmt *q, sqlite3 *dst) { + sqlite3_stmt *apply = NULL; + int rc = sqlite3_prepare_v2(dst, "SELECT cloudsync_payload_apply(?);", -1, &apply, NULL); + if (rc != SQLITE_OK) return rc; + while ((rc = sqlite3_step(q)) == SQLITE_ROW) { + sqlite3_bind_blob(apply, 1, sqlite3_column_blob(q, 0), sqlite3_column_bytes(q, 0), SQLITE_STATIC); + if (sqlite3_step(apply) != SQLITE_ROW) { sqlite3_finalize(apply); return SQLITE_ERROR; } + sqlite3_reset(apply); + sqlite3_clear_bindings(apply); + } + sqlite3_finalize(apply); + return (rc == SQLITE_DONE) ? SQLITE_OK : rc; +} + +// Verify table t holds exactly the ids in expected_csv (sorted, '|'-joined). +static bool test_chunks_ids_equal (sqlite3 *db, const char *expected_csv) { + sqlite3_stmt *s = NULL; + if (sqlite3_prepare_v2(db, "SELECT coalesce(group_concat(id, '|'), '') FROM (SELECT id FROM t ORDER BY id);", -1, &s, NULL) != SQLITE_OK) return false; + bool ok = false; + if (sqlite3_step(s) == SQLITE_ROW) { + const char *v = (const char *)sqlite3_column_text(s, 0); + ok = v && strcmp(v, expected_csv) == 0; + } + sqlite3_finalize(s); + return ok; +} + +bool do_test_payload_chunks_site_exclusion (bool print_result, bool cleanup_databases) { + sqlite3 *db[4] = {NULL, NULL, NULL, NULL}; + sqlite3_stmt *stmt = NULL; + bool result = false; + int rc = SQLITE_OK; + unsigned char s1[16] = {0}; + + time_t timestamp = time(NULL); + int saved_counter = test_counter++; + + for (int i = 0; i < 4; ++i) { + db[i] = do_create_database_file(i, timestamp, saved_counter); + if (!db[i]) goto finalize; + rc = sqlite3_exec(db[i], + "CREATE TABLE t (id TEXT PRIMARY KEY, note TEXT DEFAULT '');" + "SELECT cloudsync_init('t');", NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + } + + // db[0] = local site S0 changes; db[1] = remote site S1 changes + rc = sqlite3_exec(db[0], "INSERT INTO t(id, note) VALUES ('a0','n'),('b0','n');", NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_exec(db[1], "INSERT INTO t(id, note) VALUES ('a1','n'),('b1','n');", NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + + // capture S1 (db[1]'s site id) + if (sqlite3_prepare_v2(db[1], "SELECT cloudsync_siteid();", -1, &stmt, NULL) != SQLITE_OK) goto finalize; + if (sqlite3_step(stmt) != SQLITE_ROW || sqlite3_column_bytes(stmt, 0) != 16) goto finalize; + memcpy(s1, sqlite3_column_blob(stmt, 0), 16); + sqlite3_finalize(stmt); stmt = NULL; + + // transfer db[1]'s changes into db[0] (now db[0] has S0 and S1 changes) + if (sqlite3_prepare_v2(db[1], "SELECT payload FROM cloudsync_payload_chunks WHERE since_db_version=0 ORDER BY chunk_index;", -1, &stmt, NULL) != SQLITE_OK) goto finalize; + if (test_chunks_apply_all(stmt, db[0]) != SQLITE_OK) goto finalize; + sqlite3_finalize(stmt); stmt = NULL; + if (!test_chunks_ids_equal(db[0], "a0|a1|b0|b1")) goto finalize; + + // exclude S1 -> only S0 changes (a0,b0) into db[2] + if (sqlite3_prepare_v2(db[0], "SELECT payload FROM cloudsync_payload_chunks WHERE since_db_version=0 AND site_id=? AND exclude_filter_site_id=1 ORDER BY chunk_index;", -1, &stmt, NULL) != SQLITE_OK) goto finalize; + sqlite3_bind_blob(stmt, 1, s1, 16, SQLITE_STATIC); + if (test_chunks_apply_all(stmt, db[2]) != SQLITE_OK) goto finalize; + sqlite3_finalize(stmt); stmt = NULL; + if (!test_chunks_ids_equal(db[2], "a0|b0")) goto finalize; + + // inclusive filter S1 -> only S1 changes (a1,b1) into db[3] + if (sqlite3_prepare_v2(db[0], "SELECT payload FROM cloudsync_payload_chunks WHERE since_db_version=0 AND site_id=? ORDER BY chunk_index;", -1, &stmt, NULL) != SQLITE_OK) goto finalize; + sqlite3_bind_blob(stmt, 1, s1, 16, SQLITE_STATIC); + if (test_chunks_apply_all(stmt, db[3]) != SQLITE_OK) goto finalize; + sqlite3_finalize(stmt); stmt = NULL; + if (!test_chunks_ids_equal(db[3], "a1|b1")) goto finalize; + + // exclude=true without a site_id must error + if (sqlite3_prepare_v2(db[0], "SELECT payload FROM cloudsync_payload_chunks WHERE exclude_filter_site_id=1;", -1, &stmt, NULL) != SQLITE_OK) goto finalize; + if (sqlite3_step(stmt) != SQLITE_ERROR) goto finalize; // expected: xFilter raises an error + sqlite3_finalize(stmt); stmt = NULL; + + // UUID text<->blob roundtrip (dashed and undashed) must recover S1 + if (sqlite3_prepare_v2(db[0], + "SELECT cloudsync_uuid_blob(cloudsync_uuid_text(?1)) = ?1 " + "AND cloudsync_uuid_blob(cloudsync_uuid_text(?1, 0)) = ?1 " + "AND length(cloudsync_uuid_text(?1)) = 36 " + "AND length(cloudsync_uuid_text(?1, 0)) = 32;", -1, &stmt, NULL) != SQLITE_OK) goto finalize; + sqlite3_bind_blob(stmt, 1, s1, 16, SQLITE_STATIC); + if (sqlite3_step(stmt) != SQLITE_ROW || sqlite3_column_int(stmt, 0) != 1) goto finalize; + sqlite3_finalize(stmt); stmt = NULL; + + result = true; + +finalize: + if (!result && print_result) { + printf("do_test_payload_chunks_site_exclusion error: %s\n", db[0] ? sqlite3_errmsg(db[0]) : "no db"); + } + if (stmt) sqlite3_finalize(stmt); + for (int i = 0; i < 4; ++i) if (db[i]) close_db(db[i]); + if (cleanup_databases) { + for (int i = 0; i < 4; ++i) { + char path[256], walpath[300], shmpath[300]; + do_build_database_path(path, i, timestamp, saved_counter); + snprintf(walpath, sizeof(walpath), "%s-wal", path); + snprintf(shmpath, sizeof(shmpath), "%s-shm", path); + file_delete_internal(path); + file_delete_internal(walpath); + file_delete_internal(shmpath); + } + } + return result; +} + bool do_test_payload_idempotency (int nclients, bool print_result, bool cleanup_databases) { sqlite3 *db[2] = {NULL, NULL}; bool result = false; @@ -12620,6 +12768,7 @@ int main (int argc, const char * argv[]) { result += test_report("Payload Buffer Test (1MB):", do_test_payload_buffer(1024 * 1024)); result += test_report("Payload Buffer Test (10MB):", do_test_payload_buffer(10 * 1024 * 1024)); result += test_report("Payload Chunks Large Values:", do_test_payload_chunks_large_values(print_result, cleanup_databases)); + result += test_report("Payload Chunks Site Exclusion:", do_test_payload_chunks_site_exclusion(print_result, cleanup_databases)); // close local database close_db(db);