Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Dockerfile-orioledb-17
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,11 @@ RUN sed -i 's/ timescaledb,//g;' "/etc/postgresql/postgresql.conf" && \

# OrioleDB configuration
RUN sed -i 's/\(shared_preload_libraries.*\)'\''\(.*\)$/\1, orioledb'\''\2/' "/etc/postgresql/postgresql.conf" && \
echo "default_table_access_method = 'orioledb'" >> "/etc/postgresql/postgresql.conf"
echo "default_table_access_method = 'orioledb'" >> "/etc/postgresql/postgresql.conf" && \
echo "orioledb.enable_rewind = true" >> "/etc/postgresql/postgresql.conf" && \
echo "orioledb.rewind_max_time = 1200" >> "/etc/postgresql/postgresql.conf" && \
echo "orioledb.rewind_max_transactions = 100000" >> "/etc/postgresql/postgresql.conf" && \
echo "orioledb.rewind_buffers = 1280" >> "/etc/postgresql/postgresql.conf"

# Include schema migrations
COPY migrations/db /docker-entrypoint-initdb.d/
Expand Down
32 changes: 32 additions & 0 deletions ansible/tasks/stage2-setup-postgres.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,38 @@
path: '/etc/postgresql/postgresql.conf'
state: 'present'

- name: Enable OrioleDB rewind feature
ansible.builtin.lineinfile:
path: /etc/postgresql/postgresql.conf
line: "orioledb.enable_rewind = true"
state: present
when: is_psql_oriole and stage2_nix
become: yes

- name: Set OrioleDB rewind max time (20 minutes)
ansible.builtin.lineinfile:
path: /etc/postgresql/postgresql.conf
line: "orioledb.rewind_max_time = 1200"
state: present
when: is_psql_oriole and stage2_nix
become: yes

- name: Set OrioleDB rewind max transactions
ansible.builtin.lineinfile:
path: /etc/postgresql/postgresql.conf
line: "orioledb.rewind_max_transactions = 100000"
state: present
when: is_psql_oriole and stage2_nix
become: yes

- name: Set OrioleDB rewind buffers (1280 buffers = 10MB)
ansible.builtin.lineinfile:
path: /etc/postgresql/postgresql.conf
line: "orioledb.rewind_buffers = 1280"
state: present
when: is_psql_oriole and stage2_nix
become: yes

- name: Add ORIOLEDB_ENABLED environment variable
ansible.builtin.lineinfile:
line: 'ORIOLEDB_ENABLED=true'
Expand Down
6 changes: 3 additions & 3 deletions ansible/vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ postgres_major:

# Full version strings for each major version
postgres_release:
postgresorioledb-17: "17.6.0.050-orioledb"
postgres17: "17.6.1.093"
postgres15: "15.14.1.093"
postgresorioledb-17: "17.6.0.045-orioledb-rel-3"
postgres17: "17.6.1.088-rel-3"
postgres15: "15.14.1.088-rel-3"

# Non Postgres Extensions
pgbouncer_release: 1.25.1
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
-- migrate:up
do $$
begin
if exists (select 1 from pg_extension where extname = 'orioledb') then
grant execute on function extensions.orioledb_rewind_by_time(int) to postgres;
grant execute on function extensions.orioledb_rewind_to_transaction(int, bigint) to postgres;
grant execute on function extensions.orioledb_rewind_to_timestamp(timestamptz) to postgres;
end if;
end $$;

-- migrate:down
301 changes: 301 additions & 0 deletions nix/ext/orioledb-remove-restart-on-rewind.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
diff --git a/include/rewind/rewind.h b/include/rewind/rewind.h
index 354e9f82..4a4c080f 100644
--- a/include/rewind/rewind.h
+++ b/include/rewind/rewind.h
@@ -43,9 +43,6 @@ extern void log_print_rewind_queue(void);
#define SUBXIDS_PER_ITEM (25)


-#define PG_CTL_CMD_LEN (8) /* Actually we only need 4 extra chars */
-#define PG_CTL_MAX_CMD_LEN (MAXPGPATH + PG_CTL_CMD_LEN)
-
/* RewindItem and SubxidsItem should have same size to be castable to each other */
/* Empty RewindItem and SubxidsItem have invalid oxid and tag */
typedef struct RewindItem
diff --git a/src/orioledb.c b/src/orioledb.c
index 872d56c0..ebdd03b4 100644
--- a/src/orioledb.c
+++ b/src/orioledb.c
@@ -270,32 +270,6 @@ static RmgrData rmgr =
.rm_decode = orioledb_decode
};

-/*
- * We currently do not support restarting PG instance from within the extension
- * on certain systems. Refuse to enable rewind on those systems.
- */
-static bool
-orioledb_enable_rewind_check_hook(bool *newval, void **extra, GucSource source)
-{
-#if defined(WIN32)
- if (*newval)
- {
- GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED);
- GUC_check_errdetail("Rewind is not supported on Windows.");
- return false;
- }
-#elif !defined(HAVE_SETSID)
- if (*newval)
- {
- GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED);
- GUC_check_errdetail("Rewind is not supported on systems without setsid(2).");
- return false;
- }
-#endif
- /* Supported system or newval == false */
- return true;
-}
-
void
_PG_init(void)
{
@@ -853,7 +827,7 @@ _PG_init(void)
false,
PGC_POSTMASTER,
0,
- orioledb_enable_rewind_check_hook,
+ NULL,
NULL,
NULL);

diff --git a/src/rewind/rewind.c b/src/rewind/rewind.c
index 606f0d3b..987279e4 100644
--- a/src/rewind/rewind.c
+++ b/src/rewind/rewind.c
@@ -30,11 +30,9 @@
#include "postmaster/bgwriter.h"
#include "postmaster/autovacuum.h"
#include "storage/bufmgr.h"
-#include "storage/ipc.h"
#include "storage/latch.h"
#include "storage/proc.h"
#include "storage/sinvaladt.h"
-#include "utils/elog.h"
#include "utils/memutils.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
@@ -81,9 +79,6 @@ PG_FUNCTION_INFO_V1(orioledb_rewind_set_complete);
#define REWIND_MODE_XID (3)

static void orioledb_rewind_internal(int rewind_mode, int rewind_time, OXid rewind_oxid, TransactionId rewind_xid, TimestampTz rewind_timestamp);
-static void try_restart_pg(void);
-static void cleanup_fds(void);
-static void bootstrap_signals(void);

/* Interface functions */

@@ -761,26 +756,45 @@ orioledb_rewind_internal(int rewind_mode, int rewind_time, OXid rewind_oxid, Tra
rewindStartTimeStamp = GetCurrentTimestamp();
elog(LOG, "Rewind started");

- /* Terminate all other backends */
+ /*
+ * Block all signals so that the SIGTERM the postmaster will send to us
+ * during fast shutdown cannot interrupt do_rewind mid-execution.
+ */
+ sigset_t blocked_sigset,
+ old_sigset;
+
+ sigfillset(&blocked_sigset);
+ sigprocmask(SIG_BLOCK, &blocked_sigset, &old_sigset);
+
+ /*
+ * Fast shutdown (SIGINT) tells the postmaster to kill existing backends
+ * and stop accepting new connections. Unlike smart shutdown (SIGTERM),
+ * this prevents a connection pooler from reconnecting killed backends,
+ * so the wait loop below will actually drain.
+ */
+ (void) kill(PostmasterPid, SIGINT);
+
+ /*
+ * Wait for other backends to exit. Fast shutdown prevents new
+ * connections, so this loop terminates — unlike the original
+ * TerminateOtherDBBackends approach where poolers could refill the pool.
+ *
+ * We time out after 30 s (300 × 100 ms) because without a ceiling an
+ * uninterruptible backend or a lock-deadlock on rewindEvictLock could
+ * cause us to hang indefinitely while holding the exclusive lock.
+ */
retry = 0;
- TerminateOtherDBBackends(InvalidOid);
while (CountOtherDBBackends(InvalidOid, &nbackends, &nprepared))
{
if (AutoVacuumingActive() && nbackends <= 1)
break;

- elog(WARNING, "%u backends left", nbackends);
- pg_usleep(1000000L);
+ pg_usleep(100000L);
retry++;
- if (retry >= 100)
-
- /*
- * Rewind worker already stopped and come transactions could be
- * not in the buffer so we can't continue.
- */
+ if (retry >= 300)
ereport(FATAL,
(errcode(ERRCODE_INTERNAL_ERROR),
- errmsg("Backends couldn't stop in 100s, aborting rewind")));
+ errmsg("Backends couldn't stop in 30s during fast shutdown, aborting rewind")));
}

rewindMeta->addToRewindQueueDisabled = true;
@@ -791,7 +805,11 @@ orioledb_rewind_internal(int rewind_mode, int rewind_time, OXid rewind_oxid, Tra
LWLockRelease(&rewindMeta->rewindEvictLock);
elog(LOG, "Rewind complete");

- try_restart_pg();
+ /*
+ * Restore signal mask. The pending SIGTERM from the postmaster's fast
+ * shutdown is delivered here and our backend exits cleanly.
+ */
+ sigprocmask(SIG_SETMASK, &old_sigset, NULL);
}

TransactionId
@@ -1693,144 +1711,3 @@ get_rewind_run_xmin(void)
{
return pg_atomic_read_u64(&rewindMeta->runXmin);
}
-
-
-/*
- * try_restart_pg
- *
- * Attempt to restart the PostgreSQL instance.
- *
- * This function spawns a persistent, detached process that executes
- * "pg_ctl restart". Detachment ensures the child process survives
- * once the postmaster exits.
- *
- * dependency: This implementation relies on fork(2) and setsid(2). Systems
- * lacking these primitives (e.g. Windows), are not supported, so we can't end
- * up here. See orioledb_enable_rewind_check_hook for details.
- */
-static void
-try_restart_pg(void)
-{
- pid_t pid;
- sigset_t blocked_sigset,
- old_sigset;
-
- /* Block all signals until we're done */
- sigfillset(&blocked_sigset);
- sigprocmask(SIG_BLOCK, &blocked_sigset, &old_sigset);
-
- /* Flush stdio channels just before fork, to avoid double-output problems */
- fflush(NULL);
-
- pid = fork();
-
- if (pid < 0)
- {
- /* fork failed, restore signals and bail */
- sigprocmask(SIG_SETMASK, &old_sigset, NULL);
-
- elog(DEBUG3, "fork failed while attempting to restart,"
- " stopping instead");
-
- ereport(WARNING,
- (errmsg("could not restart instance"),
- errdetail("Sending shutdown request to postmaster.")));
- (void) kill(PostmasterPid, SIGTERM);
- return;
- }
-
- if (pid > 0)
- {
- /* fork successful, in parent. Restore signals and return to client */
- sigprocmask(SIG_SETMASK, &old_sigset, NULL);
- ereport(NOTICE,
- (errmsg("attempting to restart database system")));
- return;
- }
-
- /* fork succeeded, in child */
- if (setsid() < 0)
- {
- /* setsid failed: */
- goto emergency_shutdown;
- }
-
- {
- /* We're in a session that will survive when the parent goes away */
- sigset_t empty_mask;
- char bindir[MAXPGPATH];
- char cmd[PG_CTL_MAX_CMD_LEN];
- char *lastslash;
-
- strlcpy(bindir, my_exec_path, MAXPGPATH);
-
- lastslash = strrchr(bindir, '/');
-
- /* if for some reason we got a bogus bindir */
- if (lastslash == NULL)
- goto emergency_shutdown;
-
- *lastslash = '\0';
- snprintf(cmd, sizeof(cmd), "%s/pg_ctl", bindir);
-
- /* Do a little dance with fds to make logger shutdown cleanly */
- cleanup_fds();
- /* Sleep 0.5s to let the parent return */
- pg_usleep(500000);
- /* Be paranoid: restore default signal handlers and mask before execl */
- bootstrap_signals();
- sigemptyset(&empty_mask);
- sigprocmask(SIG_SETMASK, &empty_mask, NULL);
-
- execl(cmd, cmd, "restart", "-D", DataDir, (char *) NULL);
- /* execl failed: */
- goto emergency_shutdown;
- }
-
-emergency_shutdown:
-
- /*
- * If we got here, either execl or setsid failed. We can't just bail
- * because logging isn't safe since we can't guarantee allocating memory
- * won't result in a deadlock. Additionally we might've already cleaned up
- * the FDs so elog won't reach the logger anyway. We also can't really
- * proc_exit() or even exit(): Both will result in a crash while executing
- * on-exit callbacks. So we request a shutdown and _exit(). 71 exit code
- * is "system error". We don't want to put too much effort into
- * investigating here.
- */
- (void) kill(PostmasterPid, SIGTERM);
- _exit(71);
-}
-
-static void
-cleanup_fds(void)
-{
- int devnull = open("/dev/null", O_RDWR);
-
- if (devnull >= 0)
- {
- /*
- * We can't just close stderr/stdout fds, so redirect them to
- * /dev/null instead
- */
- dup2(devnull, fileno(stdin));
- dup2(devnull, fileno(stderr));
- dup2(devnull, fileno(stdout));
-
- /* Be paranoid: we don't want to ever close stdin/stderr/stdout */
- if (devnull > fileno(stdin) && devnull > fileno(stdout)
- && devnull > fileno(stderr))
- close(devnull);
- }
-}
-
-static void
-bootstrap_signals(void)
-{
- pqsignal(SIGHUP, SIG_DFL);
- pqsignal(SIGPIPE, SIG_DFL);
- pqsignal(SIGINT, SIG_DFL);
- pqsignal(SIGTERM, SIG_DFL);
- pqsignal(SIGQUIT, SIG_DFL);
-}
3 changes: 3 additions & 0 deletions nix/ext/orioledb.nix
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ stdenv.mkDerivation rec {
sha256 = "sha256-Vz3vfmTGRW+O9aXZxqixHC2CpqZJf/1UCQWoENbAak4=";
};
version = "982e11ae62c9e00c0d74f9f8de31d99ff383fd02";
patches = [
./orioledb-remove-restart-on-rewind.patch
];
buildInputs = [
curl
libkrb5
Expand Down
4 changes: 4 additions & 0 deletions nix/ext/tests/lib.nix
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ let
# OrioleDB: append orioledb to shared_preload_libraries
sed -i "s/\(shared_preload_libraries.*\)'\(.*\)$/\1, orioledb'\2/" $out/postgresql.conf
echo "default_table_access_method = 'orioledb'" >> $out/postgresql.conf
echo "orioledb.enable_rewind = true" >> $out/postgresql.conf
echo "orioledb.rewind_max_time = 1200" >> $out/postgresql.conf
echo "orioledb.rewind_max_transactions = 100000" >> $out/postgresql.conf
echo "orioledb.rewind_buffers = 1280" >> $out/postgresql.conf
''
else
""
Expand Down
Loading
Loading