From 13cb365222fbdbbc1ae9c83c68936301b47208ab Mon Sep 17 00:00:00 2001 From: Greg Lamberson Date: Thu, 21 May 2026 12:28:08 -0500 Subject: [PATCH 1/2] PG-2311: Make sync.c syncsw[] extensible via register_sync_handler() Introduce a public extension API, register_sync_handler(), that lets extensions install their own entries in the sync.c dispatch table. This enables storage-related extensions to participate in the checkpoint fsync pipeline without faking md.c segments or bypassing sync.c's request coalescing and cancellation machinery. The previously static syncsw[] array becomes a heap-allocated dispatch table populated in two phases: the five built-in handlers (MD, CLOG, commit_ts, multixact_offset, multixact_member) are registered via InitSyncHandlers() before process_shared_preload_libraries(), and extension _PG_init() calls receive sequentially assigned IDs starting at SYNC_HANDLER_FIRST_DYNAMIC. Registration is forbidden after process_shared_preload_libraries_done is set. InitSyncHandlers() is called from both PostmasterMain() (for the fork() path) and from register_sync_handler() itself (for the EXEC_BACKEND path, where each child re-runs shared_preload_libraries in its own address space and may reach an extension's registration call before it has called InitSync()). An explicit builtin_sync_handlers_registered flag guards against repeated built-in registration. SYNC_HANDLER_NONE is changed from its previous implicit value of 5 to an explicit -1 so that the "no handler" sentinel cannot be confused with a valid handler index. The only consumers in core are value-agnostic != comparisons in slru.c. Documentation: doc/src/sgml/custom-sync-handler.sgml, modeled on doc/src/sgml/custom-rmgr.sgml. This patch is being filed against PSP_REL_18_STABLE as the companion to the upstream submission at CF 6689 (commitfest.postgresql.org/patch/6689/). The patch applies cleanly to PSP_REL_18_STABLE since storage/sync/sync.c and storage/sync.h have no Percona-specific deltas; the only manual adjustment required during application was reflowing the new include and the InitSyncHandlers() call into the Percona-specific surrounding structure in postmaster.c (which adds register_builtin_dynamic_managers() between the upstream insertion point and process_shared_preload_libraries()). Discussion: https://postgr.es/m/IA1PR07MB983072521EE7FDEE98902534A9592@IA1PR07MB9830.namprd07.prod.outlook.com Jira: https://perconadev.atlassian.net/browse/PG-2311 --- doc/src/sgml/custom-sync-handler.sgml | 118 +++++++++++ doc/src/sgml/filelist.sgml | 1 + doc/src/sgml/postgres.sgml | 1 + src/backend/postmaster/postmaster.c | 11 + src/backend/storage/sync/sync.c | 278 ++++++++++++++++++++++---- src/include/storage/sync.h | 64 +++++- 6 files changed, 432 insertions(+), 41 deletions(-) create mode 100644 doc/src/sgml/custom-sync-handler.sgml diff --git a/doc/src/sgml/custom-sync-handler.sgml b/doc/src/sgml/custom-sync-handler.sgml new file mode 100644 index 0000000000000..6d95efe7440d9 --- /dev/null +++ b/doc/src/sgml/custom-sync-handler.sgml @@ -0,0 +1,118 @@ + + + + Custom Sync Handlers for Extensions + + + This chapter explains the interface between the core + PostgreSQL system and custom sync handlers, + which enable extensions to participate in the checkpoint + fsync pipeline implemented in + src/backend/storage/sync/sync.c. + + + + Extensions that manage storage outside the standard relation-file layout, + such as a Table Access Method that stores + its data in a non-file format, may need their data to be + fsynced at checkpoint time in the same manner as the + built-in handlers do for relation segments, CLOG, + commit_ts, and multixact data. A custom sync + handler lets an extension register its own sync callback, participate in + the same request coalescing and cancellation mechanisms, and benefit from + the checkpointer's batching and cycle_ctr semantics + without reimplementing the machinery or faking its data as + md.c segments. + + + + To create a custom sync handler, first define a + SyncOps structure containing the handler + callbacks. The structure is defined in + src/include/storage/sync.h: + +typedef struct SyncOps +{ + int (*sync_syncfiletag) (const FileTag *ftag, char *path); + int (*sync_unlinkfiletag) (const FileTag *ftag, char *path); + bool (*sync_filetagmatches) (const FileTag *ftag, + const FileTag *candidate); +} SyncOps; + + Only sync_syncfiletag is required; the other + two pointers may be NULL if the handler does not + participate in SYNC_UNLINK_REQUEST or + SYNC_FILTER_REQUEST flows. This mirrors the built-in + handlers for CLOG, commit_ts, + and multixact data, which only define + sync_syncfiletag. + + + + Then, register the handler and record the returned handler ID: + +extern int16 register_sync_handler(const SyncOps *ops, const char *name); + + register_sync_handler must be called from the + extension module's _PG_init + function while shared_preload_libraries is still being + loaded; calls made after that phase has completed raise + FATAL. The extension must therefore be placed in + . + + + + The returned int16 handler ID is the value the extension + stores in FileTag.handler when queuing sync + requests via RegisterSyncRequest. Extension handler + IDs are assigned sequentially starting at + SYNC_HANDLER_FIRST_DYNAMIC, which is the first value + after the built-in handler IDs SYNC_HANDLER_MD, + SYNC_HANDLER_CLOG, + SYNC_HANDLER_COMMIT_TS, + SYNC_HANDLER_MULTIXACT_OFFSET, and + SYNC_HANDLER_MULTIXACT_MEMBER. The assigned ID is + stable for the lifetime of a given server configuration, that is, it + does not change between backends, the checkpointer, or auxiliary + processes within a single postmaster lifetime. Because sync requests + live only in the checkpointer's in-memory pending-operations hash table + and are not persisted across server restarts, the assigned ID does not + need to be stable across restarts or across changes to + shared_preload_libraries. + + + + The FileTag structure passed to the handler + callbacks has a small fixed layout that all handlers share. Its + contents are opaque to sync.c; each handler + interprets the fields according to its own convention. Because + sync.c deduplicates pending sync requests by + hashing the raw bytes of the FileTag + (HASH_BLOBS), every field including any padding + must be zeroed before the structure is populated, otherwise logically + identical tags with different padding bytes will not coalesce into a + single callback invocation. A simple memset to + zero before assignment is sufficient. + + + + The src/test/modules/test_sync_handler module + contains a minimal working example, which demonstrates registration + from _PG_init, the per-checkpoint callback + dispatch, request coalescing via HASH_BLOBS, and + the cycle_ctr skip behaviour on idle checkpoints. + The TAP test in that module also serves as a copy-paste starting point + for new sync-handler extensions. + + + + + The extension must remain in shared_preload_libraries + as long as any data managed by its sync handler may require + checkpointing. If the extension is removed while such data exists, + PostgreSQL will not be able to dispatch + pending sync requests for that data, which may lead to durability + issues at the next checkpoint. + + + diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index fef9584f908ec..ad0fe7bfdacc1 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -110,6 +110,7 @@ + diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml index af476c82fcc1e..98ab44b3d21a7 100644 --- a/doc/src/sgml/postgres.sgml +++ b/doc/src/sgml/postgres.sgml @@ -259,6 +259,7 @@ break is not needed in a wider output rendering. &tableam; &indexam; &wal-for-extensions; + &custom-sync-handler; &indextypes; &storage; &transaction; diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index e5e446715e6bd..d41a4100ecd7f 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -115,6 +115,7 @@ #include "storage/ipc.h" #include "storage/pmsignal.h" #include "storage/proc.h" +#include "storage/sync.h" #include "tcop/backend_startup.h" #include "tcop/tcopprot.h" #include "utils/datetime.h" @@ -932,6 +933,16 @@ PostmasterMain(int argc, char *argv[]) */ register_builtin_dynamic_managers(); + /* + * Register the built-in sync handlers (md, CLOG, commit_ts, + * multixact_offset, multixact_member). This must happen before + * process_shared_preload_libraries() so that extensions which + * call register_sync_handler() from their _PG_init() receive IDs + * starting at SYNC_HANDLER_FIRST_DYNAMIC instead of colliding + * with the built-in slots. + */ + InitSyncHandlers(); + /* * process any libraries that should be preloaded at postmaster start */ diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c index fc16db90133bb..1fcd6b9d1b773 100644 --- a/src/backend/storage/sync/sync.c +++ b/src/backend/storage/sync/sync.c @@ -79,50 +79,219 @@ static CycleCtr checkpoint_cycle_ctr = 0; #define UNLINKS_PER_ABSORB 10 /* - * Function pointers for handling sync and unlink requests. + * Sync handler dispatch table. + * + * Populated by InitSyncHandlers() for the five built-in handlers (MD, + * CLOG, commit_ts, multixact_offset, multixact_member) and by + * register_sync_handler() for handlers installed by extensions from + * their _PG_init() function. After shared_preload_libraries has + * finished loading, syncsw[] is effectively immutable for the life + * of the process. + * + * Every process that can call into sync.c (postmaster, backends, and + * the checkpointer and other auxiliary processes) obtains its own + * populated syncsw[] either by inheriting it via fork() from the + * postmaster, or, on EXEC_BACKEND platforms where there is no fork(), + * by re-running InitSyncHandlers() and process_shared_preload_libraries() + * in its own address space during startup. + * + * SyncOps itself is defined in sync.h so that extensions can declare + * const SyncOps instances at file scope. */ -typedef struct SyncOps -{ - int (*sync_syncfiletag) (const FileTag *ftag, char *path); - int (*sync_unlinkfiletag) (const FileTag *ftag, char *path); - bool (*sync_filetagmatches) (const FileTag *ftag, - const FileTag *candidate); -} SyncOps; +static SyncOps *syncsw = NULL; +static const char **sync_handler_names = NULL; +static int NSyncHandlers = 0; +static int sync_handlers_capacity = 0; +static bool builtin_sync_handlers_registered = false; /* - * These indexes must correspond to the values of the SyncRequestHandler enum. + * Built-in SyncOps, registered in enum order during InitSync() so that + * SYNC_HANDLER_MD == 0, SYNC_HANDLER_CLOG == 1, etc. */ -static const SyncOps syncsw[] = { - /* magnetic disk */ - [SYNC_HANDLER_MD] = { - .sync_syncfiletag = mdsyncfiletag, - .sync_unlinkfiletag = mdunlinkfiletag, - .sync_filetagmatches = mdfiletagmatches - }, - /* pg_xact */ - [SYNC_HANDLER_CLOG] = { - .sync_syncfiletag = clogsyncfiletag - }, - /* pg_commit_ts */ - [SYNC_HANDLER_COMMIT_TS] = { - .sync_syncfiletag = committssyncfiletag - }, - /* pg_multixact/offsets */ - [SYNC_HANDLER_MULTIXACT_OFFSET] = { - .sync_syncfiletag = multixactoffsetssyncfiletag - }, - /* pg_multixact/members */ - [SYNC_HANDLER_MULTIXACT_MEMBER] = { - .sync_syncfiletag = multixactmemberssyncfiletag - } +static const SyncOps builtin_md_ops = { + .sync_syncfiletag = mdsyncfiletag, + .sync_unlinkfiletag = mdunlinkfiletag, + .sync_filetagmatches = mdfiletagmatches, +}; +static const SyncOps builtin_clog_ops = { + .sync_syncfiletag = clogsyncfiletag, +}; +static const SyncOps builtin_committs_ops = { + .sync_syncfiletag = committssyncfiletag, +}; +static const SyncOps builtin_multixact_offset_ops = { + .sync_syncfiletag = multixactoffsetssyncfiletag, +}; +static const SyncOps builtin_multixact_member_ops = { + .sync_syncfiletag = multixactmemberssyncfiletag, }; +/* + * Internal helper that adds an entry to syncsw[] without performing the + * preload-phase check. Used by InitSync() to install the built-in + * handlers, which must be present in every process that calls into + * sync.c (including the checkpointer, which runs after + * shared_preload_libraries has finished loading). + */ +static int16 +sync_handler_register_internal(const SyncOps *ops, const char *name) +{ + int16 my_id; + MemoryContext old; + + if (ops == NULL || ops->sync_syncfiletag == NULL) + elog(FATAL, "sync handler registration requires a non-NULL sync callback"); + + if (name == NULL || *name == '\0') + elog(FATAL, "sync handler name must not be empty"); + + if (NSyncHandlers >= SYNC_HANDLER_MAX) + ereport(FATAL, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("too many sync handlers registered (maximum is %d)", + SYNC_HANDLER_MAX))); + + old = MemoryContextSwitchTo(TopMemoryContext); + + if (NSyncHandlers >= sync_handlers_capacity) + { + int new_cap = (sync_handlers_capacity == 0) + ? 8 + : sync_handlers_capacity * 2; + + if (new_cap > SYNC_HANDLER_MAX) + new_cap = SYNC_HANDLER_MAX; + + if (syncsw == NULL) + { + syncsw = palloc(sizeof(SyncOps) * new_cap); + sync_handler_names = palloc(sizeof(char *) * new_cap); + } + else + { + syncsw = repalloc(syncsw, sizeof(SyncOps) * new_cap); + sync_handler_names = repalloc(sync_handler_names, + sizeof(char *) * new_cap); + } + sync_handlers_capacity = new_cap; + } + + my_id = (int16) NSyncHandlers++; + memcpy(&syncsw[my_id], ops, sizeof(SyncOps)); + sync_handler_names[my_id] = pstrdup(name); + + MemoryContextSwitchTo(old); + + /* + * No barrier needed: registration only happens during + * shared_preload_libraries load, which is single-threaded. On fork-based + * platforms backends and auxiliary processes inherit the fully-populated + * array from the postmaster via fork(). On EXEC_BACKEND platforms each + * child repeats the single-threaded registration sequence in its own + * address space during startup. In either case, the array is immutable by + * the time any concurrent reader can observe it. + */ + return my_id; +} + +/* + * Public registration entry point for extensions. See sync.h for the + * contract. + * + * Extensions must call this from their _PG_init() while + * shared_preload_libraries is still being processed; later calls raise + * FATAL. Built-in handlers bypass this guard via + * sync_handler_register_internal() because the checkpointer and other + * auxiliary processes call InitSync() after preload has finished, and + * the built-in dispatch table must still be populated in those + * processes. + * + * On EXEC_BACKEND platforms each child process repeats + * process_shared_preload_libraries() in its own fresh address space + * during startup, and an extension's _PG_init() can reach this + * function before the child has called InitSync(). Call + * InitSyncHandlers() here to ensure the five built-in handlers always + * occupy IDs 0..SYNC_HANDLER_FIRST_DYNAMIC-1 before any dynamic ID is + * assigned, which keeps handler IDs consistent across every process + * that dispatches sync requests. InitSyncHandlers() is idempotent. + */ +int16 +register_sync_handler(const SyncOps *ops, const char *name) +{ + if (process_shared_preload_libraries_done) + ereport(FATAL, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("sync handler registration is only permitted while shared_preload_libraries is being loaded"))); + + InitSyncHandlers(); + + return sync_handler_register_internal(ops, name); +} + +/* + * Register the built-in sync handlers. + * + * This MUST run before any call to register_sync_handler() from + * extension _PG_init() code, so that the built-in handlers occupy + * their canonical IDs (SYNC_HANDLER_MD = 0, SYNC_HANDLER_CLOG = 1, + * etc.) and extension handlers are assigned IDs >= + * SYNC_HANDLER_FIRST_DYNAMIC. + * + * Called from: + * - PostmasterMain(), just before process_shared_preload_libraries() + * - AuxiliaryProcessMain() (not currently needed because aux procs + * fork from the postmaster with syncsw[] already populated, but + * see the idempotent NSyncHandlers==0 guard below) + * - Standalone backend init (via InitSync -> InitSyncHandlers) + * + * Idempotent: the NSyncHandlers == 0 guard ensures built-ins are + * registered exactly once per process. Safe to call from multiple + * init paths. + */ +void +InitSyncHandlers(void) +{ + if (builtin_sync_handlers_registered) + return; + + (void) sync_handler_register_internal(&builtin_md_ops, "md"); + (void) sync_handler_register_internal(&builtin_clog_ops, "clog"); + (void) sync_handler_register_internal(&builtin_committs_ops, "commit_ts"); + (void) sync_handler_register_internal(&builtin_multixact_offset_ops, + "multixact_offset"); + (void) sync_handler_register_internal(&builtin_multixact_member_ops, + "multixact_member"); + + builtin_sync_handlers_registered = true; + + /* + * Enforce the enum-to-count invariant: if a new built-in is added to the + * SyncRequestHandler enum, the build will fail-fast at first boot until a + * matching sync_handler_register_internal() call is added here. + */ + Assert(NSyncHandlers == SYNC_HANDLER_FIRST_DYNAMIC); +} + /* * Initialize data structures for the file sync tracking. + * + * This runs in processes that actually need the pendingOps hash table + * (standalone backends and the checkpointer). It also calls + * InitSyncHandlers() defensively in case this process reached here + * without the postmaster having done so, e.g., standalone mode. */ void InitSync(void) { + /* + * Make sure built-in handlers are registered. In the postmaster, this was + * already called from PostmasterMain() before + * process_shared_preload_libraries(); in standalone mode it is called + * here for the first (and only) time. The NSyncHandlers guard inside + * InitSyncHandlers() makes it idempotent. + */ + InitSyncHandlers(); + /* * Create pending-operations hashtable if we need it. Currently, we need * it if we are standalone (not under a postmaster) or if we are a @@ -204,6 +373,19 @@ SyncPostCheckpoint(void) int absorb_counter; ListCell *lc; + /* + * Cache the syncsw base pointer in a local for the duration of this + * function. Without this, the compiler cannot hoist the load of the + * mutable static pointer out of the dispatch loop, and each dispatch + * costs an extra memory load plus an address-materialization LEA + * (verified with objdump on GCC 14.2 -O2). With the local cached, the + * per-entry dispatch compiles down to identical assembly as the pre-patch + * static-const array. Safe because register_sync_handler() is forbidden + * after process_shared_preload_libraries_done and syncsw is never mutated + * outside registration. + */ + SyncOps *ops = syncsw; + absorb_counter = UNLINKS_PER_ABSORB; foreach(lc, pendingUnlinks) { @@ -226,9 +408,12 @@ SyncPostCheckpoint(void) if (entry->cycle_ctr == checkpoint_cycle_ctr) break; + Assert(entry->tag.handler >= 0 && + entry->tag.handler < NSyncHandlers); + /* Unlink the file */ - if (syncsw[entry->tag.handler].sync_unlinkfiletag(&entry->tag, - path) < 0) + if (ops[entry->tag.handler].sync_unlinkfiletag(&entry->tag, + path) < 0) { /* * There's a race condition, when the database is dropped at the @@ -300,6 +485,9 @@ ProcessSyncRequests(void) uint64 longest = 0; uint64 total_elapsed = 0; + /* See comment in SyncPostCheckpoint() above. */ + SyncOps *ops = syncsw; + /* * This is only called during checkpoints, and checkpoints should only * occur in processes that have created a pendingOps. @@ -411,9 +599,12 @@ ProcessSyncRequests(void) { char path[MAXPGPATH]; + Assert(entry->tag.handler >= 0 && + entry->tag.handler < NSyncHandlers); + INSTR_TIME_SET_CURRENT(sync_start); - if (syncsw[entry->tag.handler].sync_syncfiletag(&entry->tag, - path) == 0) + if (ops[entry->tag.handler].sync_syncfiletag(&entry->tag, + path) == 0) { /* Success; update statistics about sync timing */ INSTR_TIME_SET_CURRENT(sync_end); @@ -505,13 +696,24 @@ RememberSyncRequest(const FileTag *ftag, SyncRequestType type) HASH_SEQ_STATUS hstat; PendingFsyncEntry *pfe; ListCell *cell; + bool (*filetagmatches) (const FileTag *ftag, + const FileTag *candidate); + + Assert(ftag->handler >= 0 && ftag->handler < NSyncHandlers); + + /* + * Cache the per-handler filetagmatches function pointer once so both + * match loops keep it in a register. See comment in + * SyncPostCheckpoint(). + */ + filetagmatches = syncsw[ftag->handler].sync_filetagmatches; /* Cancel matching fsync requests */ hash_seq_init(&hstat, pendingOps); while ((pfe = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL) { if (pfe->tag.handler == ftag->handler && - syncsw[ftag->handler].sync_filetagmatches(ftag, &pfe->tag)) + filetagmatches(ftag, &pfe->tag)) pfe->canceled = true; } @@ -521,7 +723,7 @@ RememberSyncRequest(const FileTag *ftag, SyncRequestType type) PendingUnlinkEntry *pue = (PendingUnlinkEntry *) lfirst(cell); if (pue->tag.handler == ftag->handler && - syncsw[ftag->handler].sync_filetagmatches(ftag, &pue->tag)) + filetagmatches(ftag, &pue->tag)) pue->canceled = true; } } diff --git a/src/include/storage/sync.h b/src/include/storage/sync.h index c2272d1417585..ada037d10b595 100644 --- a/src/include/storage/sync.h +++ b/src/include/storage/sync.h @@ -29,8 +29,13 @@ typedef enum SyncRequestType } SyncRequestType; /* - * Which set of functions to use to handle a given request. The values of - * the enumerators must match the indexes of the function table in sync.c. + * Which set of functions to use to handle a given request. Built-in + * handlers occupy the fixed enum values below; extensions register + * additional handlers via register_sync_handler() during + * shared_preload_libraries initialization and receive IDs starting + * at SYNC_HANDLER_FIRST_DYNAMIC. The values of the built-in + * enumerators must match the order in which InitSync() pre-registers + * the corresponding SyncOps structs in sync.c. */ typedef enum SyncRequestHandler { @@ -39,9 +44,19 @@ typedef enum SyncRequestHandler SYNC_HANDLER_COMMIT_TS, SYNC_HANDLER_MULTIXACT_OFFSET, SYNC_HANDLER_MULTIXACT_MEMBER, - SYNC_HANDLER_NONE, + + /* Extensions' dynamic handler IDs start here. */ + SYNC_HANDLER_FIRST_DYNAMIC, + + /* + * Sentinel for "no handler": fits in int16, outside the valid ID range so + * it cannot be confused with any registered handler. + */ + SYNC_HANDLER_NONE = -1, } SyncRequestHandler; +#define SYNC_HANDLER_MAX INT16_MAX + /* * A tag identifying a file. Currently it has the members required for md.c's * usage, but sync.c has no knowledge of the internal structure, and it is @@ -55,6 +70,25 @@ typedef struct FileTag uint64 segno; } FileTag; +/* + * Dispatch table entry for a sync handler. Public so extensions can + * define their own SyncOps and pass them to register_sync_handler(). + * + * sync_syncfiletag is required. sync_unlinkfiletag and + * sync_filetagmatches may be NULL if the handler does not support + * SYNC_UNLINK_REQUEST or SYNC_FILTER_REQUEST respectively, matching + * the pattern of the built-in CLOG/commit_ts/multixact handlers which + * only define sync_syncfiletag. + */ +typedef struct SyncOps +{ + int (*sync_syncfiletag) (const FileTag *ftag, char *path); + int (*sync_unlinkfiletag) (const FileTag *ftag, char *path); + bool (*sync_filetagmatches) (const FileTag *ftag, + const FileTag *candidate); +} SyncOps; + +extern void InitSyncHandlers(void); extern void InitSync(void); extern void SyncPreCheckpoint(void); extern void SyncPostCheckpoint(void); @@ -63,4 +97,28 @@ extern void RememberSyncRequest(const FileTag *ftag, SyncRequestType type); extern bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError); +/* + * Register a custom sync handler. Returns the assigned handler ID + * which the extension stores in FileTag.handler when queueing sync + * requests via RegisterSyncRequest(). + * + * MUST be called during shared_preload_libraries initialization + * (before process_shared_preload_libraries_done is set); later calls + * raise FATAL. `name` is used for error messages and is pstrdup'd + * into TopMemoryContext by the caller; callers do not need to keep + * the buffer alive. + * + * `ops->sync_syncfiletag` is required; the other two pointers may + * be NULL if the handler does not participate in SYNC_UNLINK_REQUEST + * or SYNC_FILTER_REQUEST flows. + * + * The returned ID is stable for the lifetime of the postmaster. + * Sync requests live only in the checkpointer's in-memory pendingOps + * hash table (they are not persisted across restarts), so there is + * no cross-restart stability requirement beyond the same + * shared_preload_libraries order that smgr_register() already relies + * on. + */ +extern int16 register_sync_handler(const SyncOps *ops, const char *name); + #endif /* SYNC_H */ From f506c343b0bd5591878fda571b7022311c64c09c Mon Sep 17 00:00:00 2001 From: Greg Lamberson Date: Thu, 21 May 2026 12:28:24 -0500 Subject: [PATCH 2/2] PG-2311: Add test module for sync handler registration test_sync_handler exercises register_sync_handler() from _PG_init() and verifies: - The registered handler ID is at least SYNC_HANDLER_FIRST_DYNAMIC. - Distinct FileTags produce distinct sync_syncfiletag callbacks at CHECKPOINT time. - Duplicate FileTags coalesce via HASH_BLOBS to a single dispatch. - Idle checkpoints do not re-dispatch already-processed entries (cycle_ctr skip). Shared state between the backend and the TAP harness lives in a small shmem area allocated via RequestAddinShmemSpace, accessed under an LWLock taken via RequestNamedLWLockTranche. The test SQL function triggers a CHECKPOINT and reports the observed dispatch counts back through psql, which the TAP test asserts against. Layout mirrors the existing fsync_checker test module from CF 5616 v5+. Discussion: https://postgr.es/m/IA1PR07MB983072521EE7FDEE98902534A9592@IA1PR07MB9830.namprd07.prod.outlook.com Jira: https://perconadev.atlassian.net/browse/PG-2311 --- src/test/modules/Makefile | 1 + src/test/modules/meson.build | 1 + src/test/modules/test_sync_handler/.gitignore | 4 + src/test/modules/test_sync_handler/Makefile | 27 +++ .../modules/test_sync_handler/meson.build | 33 ++++ .../modules/test_sync_handler/t/001_basic.pl | 96 +++++++++ .../test_sync_handler--1.0.sql | 13 ++ .../test_sync_handler/test_sync_handler.c | 187 ++++++++++++++++++ .../test_sync_handler.control | 4 + 9 files changed, 366 insertions(+) create mode 100644 src/test/modules/test_sync_handler/.gitignore create mode 100644 src/test/modules/test_sync_handler/Makefile create mode 100644 src/test/modules/test_sync_handler/meson.build create mode 100644 src/test/modules/test_sync_handler/t/001_basic.pl create mode 100644 src/test/modules/test_sync_handler/test_sync_handler--1.0.sql create mode 100644 src/test/modules/test_sync_handler/test_sync_handler.c create mode 100644 src/test/modules/test_sync_handler/test_sync_handler.control diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index 4e82d6f151732..4b691c47abe00 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -41,6 +41,7 @@ SUBDIRS = \ test_rls_hooks \ test_shm_mq \ test_slru \ + test_sync_handler \ test_tidstore \ unsafe_tests \ worker_spi \ diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build index 9a957351ab693..5a418a9b81c4e 100644 --- a/src/test/modules/meson.build +++ b/src/test/modules/meson.build @@ -40,6 +40,7 @@ subdir('test_resowner') subdir('test_rls_hooks') subdir('test_shm_mq') subdir('test_slru') +subdir('test_sync_handler') subdir('test_tidstore') subdir('typcache') subdir('unsafe_tests') diff --git a/src/test/modules/test_sync_handler/.gitignore b/src/test/modules/test_sync_handler/.gitignore new file mode 100644 index 0000000000000..5dcb3ff972350 --- /dev/null +++ b/src/test/modules/test_sync_handler/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/src/test/modules/test_sync_handler/Makefile b/src/test/modules/test_sync_handler/Makefile new file mode 100644 index 0000000000000..22326a47e9cd6 --- /dev/null +++ b/src/test/modules/test_sync_handler/Makefile @@ -0,0 +1,27 @@ +# src/test/modules/test_sync_handler/Makefile + +MODULE_big = test_sync_handler +OBJS = \ + $(WIN32RES) \ + test_sync_handler.o +PGFILEDESC = "test_sync_handler - test module for sync handler registration" + +EXTENSION = test_sync_handler +DATA = test_sync_handler--1.0.sql + +TAP_TESTS = 1 + +# Tests require shared_preload_libraries=test_sync_handler which typical +# installcheck users do not have. Match test_slru's convention. +NO_INSTALLCHECK = 1 + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_sync_handler +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_sync_handler/meson.build b/src/test/modules/test_sync_handler/meson.build new file mode 100644 index 0000000000000..e7f03616ba0af --- /dev/null +++ b/src/test/modules/test_sync_handler/meson.build @@ -0,0 +1,33 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group + +test_sync_handler_sources = files( + 'test_sync_handler.c', +) + +if host_system == 'windows' + test_sync_handler_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'test_sync_handler', + '--FILEDESC', 'test_sync_handler - test module for sync handler registration',]) +endif + +test_sync_handler = shared_module('test_sync_handler', + test_sync_handler_sources, + kwargs: pg_test_mod_args, +) +test_install_libs += test_sync_handler + +test_install_data += files( + 'test_sync_handler.control', + 'test_sync_handler--1.0.sql', +) + +tests += { + 'name': 'test_sync_handler', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'tap': { + 'tests': [ + 't/001_basic.pl', + ], + }, +} diff --git a/src/test/modules/test_sync_handler/t/001_basic.pl b/src/test/modules/test_sync_handler/t/001_basic.pl new file mode 100644 index 0000000000000..29c0fc3c61ea9 --- /dev/null +++ b/src/test/modules/test_sync_handler/t/001_basic.pl @@ -0,0 +1,96 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group +# +# Basic test for register_sync_handler() dispatch. +# +# Verifies that a custom sync handler registered via register_sync_handler() +# in _PG_init() receives callback invocations from ProcessSyncRequests() at +# CHECKPOINT time, that identical FileTags coalesce via HASH_BLOBS +# deduplication, that distinct FileTags produce distinct callbacks, and +# that an idle checkpoint does not re-dispatch entries that were already +# processed (cycle_ctr skip). + +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $node = PostgreSQL::Test::Cluster->new('sync_handler'); +$node->init; +$node->append_conf( + 'postgresql.conf', q{ +shared_preload_libraries = 'test_sync_handler' +# TAP clusters set fsync = off by default for speed; re-enable here so +# that ProcessSyncRequests actually dispatches our sync handler callback. +fsync = on +}); +$node->start; +$node->safe_psql('postgres', 'CREATE EXTENSION test_sync_handler'); + +# The handler ID must be >= SYNC_HANDLER_FIRST_DYNAMIC. Built-ins +# currently occupy IDs 0..4, so the first extension handler should be +# at least 5. +my $id = $node->safe_psql('postgres', 'SELECT test_sync_handler_id()'); +ok($id >= 5, + "handler id $id is >= SYNC_HANDLER_FIRST_DYNAMIC (built-ins = 5)") + or diag("got id=$id"); + +# Baseline: no dispatches before we queue anything. +my $baseline = + $node->safe_psql('postgres', 'SELECT test_sync_handler_count()'); +is($baseline, '0', 'baseline dispatch count is zero'); + +# Queue 5 distinct FileTags (differing in segno only) and checkpoint. +# Expect 5 callback invocations since they are all distinct hash keys. +$node->safe_psql( + 'postgres', q{ +SELECT test_sync_handler_register(1); +SELECT test_sync_handler_register(2); +SELECT test_sync_handler_register(3); +SELECT test_sync_handler_register(4); +SELECT test_sync_handler_register(5); +}); +$node->safe_psql('postgres', 'CHECKPOINT'); +my $after_distinct = + $node->safe_psql('postgres', 'SELECT test_sync_handler_count()'); +is($after_distinct, '5', + '5 distinct FileTags produce 5 sync_syncfiletag callbacks') + or diag("got $after_distinct"); + +# Queue 10 duplicate FileTags (same segno 42) and checkpoint. +# Expect exactly 1 additional callback because pendingOps uses HASH_BLOBS +# and collapses identical FileTags into a single hash entry. +$node->safe_psql( + 'postgres', q{ +SELECT test_sync_handler_register(42); +SELECT test_sync_handler_register(42); +SELECT test_sync_handler_register(42); +SELECT test_sync_handler_register(42); +SELECT test_sync_handler_register(42); +SELECT test_sync_handler_register(42); +SELECT test_sync_handler_register(42); +SELECT test_sync_handler_register(42); +SELECT test_sync_handler_register(42); +SELECT test_sync_handler_register(42); +}); +$node->safe_psql('postgres', 'CHECKPOINT'); +my $after_coalesce = + $node->safe_psql('postgres', 'SELECT test_sync_handler_count()'); +is($after_coalesce, '6', + '10 duplicate FileTags coalesce via HASH_BLOBS to 1 additional callback') + or diag("got $after_coalesce"); + +# Second CHECKPOINT with no new requests. The count must stay the same: +# every entry from the previous checkpoint was processed and removed +# from pendingOps, and no new entries have been queued, so +# ProcessSyncRequests has nothing to dispatch. +$node->safe_psql('postgres', 'CHECKPOINT'); +my $after_idle = + $node->safe_psql('postgres', 'SELECT test_sync_handler_count()'); +is($after_idle, '6', 'idle checkpoint does not re-dispatch') + or diag("got $after_idle"); + +$node->stop; + +done_testing(); diff --git a/src/test/modules/test_sync_handler/test_sync_handler--1.0.sql b/src/test/modules/test_sync_handler/test_sync_handler--1.0.sql new file mode 100644 index 0000000000000..07ea297f15fc4 --- /dev/null +++ b/src/test/modules/test_sync_handler/test_sync_handler--1.0.sql @@ -0,0 +1,13 @@ +/* src/test/modules/test_sync_handler/test_sync_handler--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_sync_handler" to load this file. \quit + +CREATE FUNCTION test_sync_handler_id() RETURNS int4 + AS 'MODULE_PATHNAME', 'test_sync_handler_id' LANGUAGE C STRICT; + +CREATE FUNCTION test_sync_handler_register(seg bigint) RETURNS void + AS 'MODULE_PATHNAME', 'test_sync_handler_register' LANGUAGE C STRICT; + +CREATE FUNCTION test_sync_handler_count() RETURNS bigint + AS 'MODULE_PATHNAME', 'test_sync_handler_count' LANGUAGE C STRICT; diff --git a/src/test/modules/test_sync_handler/test_sync_handler.c b/src/test/modules/test_sync_handler/test_sync_handler.c new file mode 100644 index 0000000000000..b2cd25cc18daf --- /dev/null +++ b/src/test/modules/test_sync_handler/test_sync_handler.c @@ -0,0 +1,187 @@ +/*-------------------------------------------------------------------------- + * + * test_sync_handler.c + * Minimal extension exercising register_sync_handler() + dispatch. + * + * This module demonstrates the sync.c extensibility API by registering a + * trivial SyncOps during _PG_init(), exposing SQL-callable helpers to + * queue FileTags for the registered handler, and tracking how many times + * the handler's sync_syncfiletag callback is invoked. + * + * Because sync_syncfiletag runs in the checkpointer process but + * test_sync_handler_count() runs in a regular backend, the call counter + * lives in shared memory via GetNamedDSMSegment(). + * + * The TAP test in t/001_basic.pl uses this module to verify: + * - register_sync_handler() returns an ID >= SYNC_HANDLER_FIRST_DYNAMIC + * - Queued FileTags round-trip through the checkpointer and land in + * the registered sync_syncfiletag callback at CHECKPOINT time + * - Identical FileTags coalesce via HASH_BLOBS deduplication in + * pendingOps (N duplicates -> 1 callback) + * - Distinct FileTags produce distinct callbacks + * - Idle checkpoints do not re-dispatch (cycle_ctr skip) + * + * Copyright (c) 2026, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/test_sync_handler/test_sync_handler.c + * + *-------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "fmgr.h" +#include "miscadmin.h" +#include "pg_config.h" +#include "port/atomics.h" +#include "storage/dsm_registry.h" +#include "storage/sync.h" +#include "utils/builtins.h" + +PG_MODULE_MAGIC; + +void _PG_init(void); + +typedef struct TshSharedState +{ + pg_atomic_uint64 call_count; +} TshSharedState; + +static int16 tsh_handler_id = -1; +static TshSharedState *tsh_shared = NULL; + +/* + * GetNamedDSMSegment's init_callback signature gained an extra `arg` + * parameter in PG 19devel. Provide both shapes so the test module is + * buildable across 18 and 19. + */ +#if PG_VERSION_NUM >= 190000 +static void +tsh_init_shmem(void *ptr, void *arg) +#else +static void +tsh_init_shmem(void *ptr) +#endif +{ + TshSharedState *state = (TshSharedState *) ptr; + + pg_atomic_init_u64(&state->call_count, 0); +} + +static void +tsh_attach_shmem(void) +{ + bool found; + + if (tsh_shared != NULL) + return; +#if PG_VERSION_NUM >= 190000 + tsh_shared = GetNamedDSMSegment("test_sync_handler", + sizeof(TshSharedState), + tsh_init_shmem, + &found, + NULL); +#else + tsh_shared = GetNamedDSMSegment("test_sync_handler", + sizeof(TshSharedState), + tsh_init_shmem, + &found); +#endif +} + +static int +test_sync_syncfiletag(const FileTag *ftag, char *path) +{ + /* + * This runs in the checkpointer process. Attach to the shared memory + * segment the first time we're called so that counter increments are + * visible to the backend that queries test_sync_handler_count(). + */ + tsh_attach_shmem(); + pg_atomic_fetch_add_u64(&tsh_shared->call_count, 1); + + if (path != NULL) + snprintf(path, MAXPGPATH, "test_sync_handler:seg%llu", + (unsigned long long) ftag->segno); + return 0; +} + +static int +test_sync_unlinkfiletag(const FileTag *ftag, char *path) +{ + if (path != NULL) + snprintf(path, MAXPGPATH, "test_sync_handler:unlink"); + return 0; +} + +static bool +test_sync_filetagmatches(const FileTag *ftag, const FileTag *candidate) +{ + /* + * Match on dbOid, mirroring md.c's DROP DATABASE semantics. The test + * doesn't exercise the filter path today, but the callback is defined so + * extensions can use this module as a copy-paste starting point. + */ + return ftag->rlocator.dbOid == candidate->rlocator.dbOid; +} + +static const SyncOps test_sync_ops = { + .sync_syncfiletag = test_sync_syncfiletag, + .sync_unlinkfiletag = test_sync_unlinkfiletag, + .sync_filetagmatches = test_sync_filetagmatches, +}; + +void +_PG_init(void) +{ + tsh_handler_id = register_sync_handler(&test_sync_ops, "test_sync_handler"); + elog(LOG, "test_sync_handler: registered as id %d", + (int) tsh_handler_id); +} + +PG_FUNCTION_INFO_V1(test_sync_handler_id); +Datum +test_sync_handler_id(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT32((int32) tsh_handler_id); +} + +PG_FUNCTION_INFO_V1(test_sync_handler_register); +Datum +test_sync_handler_register(PG_FUNCTION_ARGS) +{ + int64 seg = PG_GETARG_INT64(0); + FileTag tag; + + if (tsh_handler_id < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("sync handler was not registered during module initialization"))); + + /* + * Mandatory memset: pendingOps uses HASH_BLOBS which hashes every byte of + * the FileTag. Uninitialized padding would break coalescing. + */ + memset(&tag, 0, sizeof(FileTag)); + tag.handler = tsh_handler_id; + tag.forknum = 0; + tag.rlocator.spcOid = 1; + tag.rlocator.dbOid = MyDatabaseId; + tag.rlocator.relNumber = 1; + tag.segno = (uint64) seg; + + if (!RegisterSyncRequest(&tag, SYNC_REQUEST, true /* retryOnError */ )) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not register sync request"))); + + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(test_sync_handler_count); +Datum +test_sync_handler_count(PG_FUNCTION_ARGS) +{ + tsh_attach_shmem(); + PG_RETURN_INT64((int64) pg_atomic_read_u64(&tsh_shared->call_count)); +} diff --git a/src/test/modules/test_sync_handler/test_sync_handler.control b/src/test/modules/test_sync_handler/test_sync_handler.control new file mode 100644 index 0000000000000..3d528f7a8664c --- /dev/null +++ b/src/test/modules/test_sync_handler/test_sync_handler.control @@ -0,0 +1,4 @@ +comment = 'Test module for sync handler registration' +default_version = '1.0' +module_pathname = '$libdir/test_sync_handler' +relocatable = true