From 4335990e7f04a2cc80c4a98c90b28dd95cafb0d1 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 8 Feb 2023 13:09:27 +0900 Subject: [PATCH 01/61] Make EXEC_BACKEND more convenient on Linux and FreeBSD. Try to disable ASLR when building in EXEC_BACKEND mode, to avoid random memory mapping failures while testing. For developer use only, no effect on regular builds. This has been originally applied as of f3e7806 for v15~, but recently-added buildfarm member gokiburi tests this configuration on older branches as well, causing it to fail randomly as ASLR would be enabled. Suggested-by: Andres Freund Tested-by: Bossart, Nathan Discussion: https://postgr.es/m/20210806032944.m4tz7j2w47mant26%40alap3.anarazel.de Backpatch-through: 12 --- configure | 2 +- configure.ac | 1 + src/bin/pg_ctl/pg_ctl.c | 4 ++++ src/common/exec.c | 33 +++++++++++++++++++++++++++++++++ src/include/pg_config.h.in | 3 +++ src/include/port.h | 5 +++++ src/test/regress/pg_regress.c | 4 ++++ src/tools/msvc/Solution.pm | 1 + 8 files changed, 52 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 9c946381f6b..3228096e38e 100755 --- a/configure +++ b/configure @@ -16895,7 +16895,7 @@ $as_echo "#define HAVE_STDBOOL_H 1" >>confdefs.h fi -for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h langinfo.h mbarrier.h poll.h sys/epoll.h sys/event.h sys/ipc.h sys/prctl.h sys/procctl.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/signalfd.h sys/sockio.h sys/tas.h sys/uio.h sys/un.h termios.h ucred.h wctype.h +for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h langinfo.h mbarrier.h poll.h sys/epoll.h sys/event.h sys/ipc.h sys/personality.h sys/prctl.h sys/procctl.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/signalfd.h sys/sockio.h sys/tas.h sys/uio.h sys/un.h termios.h ucred.h wctype.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" diff --git a/configure.ac b/configure.ac index bcc5cf6c6ab..71c27d57691 100644 --- a/configure.ac +++ b/configure.ac @@ -1904,6 +1904,7 @@ AC_CHECK_HEADERS(m4_normalize([ sys/epoll.h sys/event.h sys/ipc.h + sys/personality.h sys/prctl.h sys/procctl.h sys/pstat.h diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c index cc733cb7be1..26d62e823cf 100644 --- a/src/bin/pg_ctl/pg_ctl.c +++ b/src/bin/pg_ctl/pg_ctl.c @@ -466,6 +466,10 @@ start_postmaster(void) fflush(stdout); fflush(stderr); +#ifdef EXEC_BACKEND + pg_disable_aslr(); +#endif + pm_pid = fork(); if (pm_pid < 0) { diff --git a/src/common/exec.c b/src/common/exec.c index 5159b616a39..dbac0598be0 100644 --- a/src/common/exec.c +++ b/src/common/exec.c @@ -27,6 +27,14 @@ #include "common/mdb_locale.h" +#ifdef EXEC_BACKEND +#if defined(HAVE_SYS_PERSONALITY_H) +#include +#elif defined(HAVE_SYS_PROCCTL_H) +#include +#endif +#endif + /* Inhibit mingw CRT's auto-globbing of command line arguments */ #if defined(WIN32) && !defined(_MSC_VER) extern int _CRT_glob = 0; /* 0 turns off globbing; 1 turns it on */ @@ -477,6 +485,31 @@ set_pglocale_pgservice(const char *argv0, const char *app) } } +#ifdef EXEC_BACKEND +/* + * For the benefit of PostgreSQL developers testing EXEC_BACKEND on Unix + * systems (code paths normally exercised only on Windows), provide a way to + * disable address space layout randomization, if we know how on this platform. + * Otherwise, backends may fail to attach to shared memory at the fixed address + * chosen by the postmaster. (See also the macOS-specific hack in + * sysv_shmem.c.) + */ +int +pg_disable_aslr(void) +{ +#if defined(HAVE_SYS_PERSONALITY_H) + return personality(ADDR_NO_RANDOMIZE); +#elif defined(HAVE_SYS_PROCCTL_H) && defined(PROC_ASLR_FORCE_DISABLE) + int data = PROC_ASLR_FORCE_DISABLE; + + return procctl(P_PID, 0, PROC_ASLR_CTL, &data); +#else + errno = ENOSYS; + return -1; +#endif +} +#endif + #ifdef WIN32 /* diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 54de6844f58..d02229909e9 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -699,6 +699,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_SYS_IPC_H +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_PERSONALITY_H + /* Define to 1 if you have the header file. */ #undef HAVE_SYS_PRCTL_H diff --git a/src/include/port.h b/src/include/port.h index 4179df36ebc..5aecc883f63 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -141,6 +141,11 @@ extern char *pipe_read_line(char *cmd, char *line, int maxsize); #define PG_VERSIONSTR "postgres (Apache Cloudberry) " PG_VERSION "\n" #define PG_BACKEND_VERSIONSTR "postgres (Apache Cloudberry) " PG_VERSION "\n" +#ifdef EXEC_BACKEND +/* Disable ASLR before exec, for developer builds only (in exec.c) */ +extern int pg_disable_aslr(void); +#endif + #if defined(WIN32) || defined(__CYGWIN__) #define EXE ".exe" diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c index c479142222e..c9921aaf5ce 100644 --- a/src/test/regress/pg_regress.c +++ b/src/test/regress/pg_regress.c @@ -1625,6 +1625,10 @@ spawn_process(const char *cmdline) if (logfile) fflush(logfile); +#ifdef EXEC_BACKEND + pg_disable_aslr(); +#endif + pid = fork(); if (pid == -1) { diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm index e7af4162759..b6393b41773 100644 --- a/src/tools/msvc/Solution.pm +++ b/src/tools/msvc/Solution.pm @@ -398,6 +398,7 @@ sub GenerateFiles HAVE_SYS_EPOLL_H => undef, HAVE_SYS_EVENT_H => undef, HAVE_SYS_IPC_H => undef, + HAVE_SYS_PERSONALITY_H => undef, HAVE_SYS_PRCTL_H => undef, HAVE_SYS_PROCCTL_H => undef, HAVE_SYS_PSTAT_H => undef, From 976fdea0fecf00c751e86deadc98446af02c2889 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 8 Feb 2023 17:15:23 -0500 Subject: [PATCH 02/61] Stop recommending auto-download of DTD files, and indeed disable it. It appears no longer possible to build the SGML docs without a local installation of the DocBook DTD, because sourceforge.net now only permits HTTPS access, and no common version of xsltproc supports that. Hence, remove the bits of our documentation suggesting that that's possible or useful. In fact, we might as well add the --nonet option to the build recipes automatically, for a bit of extra security. Also fix our documentation-tool-installation recipes for macOS to ensure that xmllint and xsltproc are pulled in from MacPorts or Homebrew. The previous recipes assumed you could use the Apple-supplied versions of these tools; which still works, except that you'd need to set an environment variable to ensure that they would find DTD files provided by those package managers. Simpler and easier to just recommend pulling in the additional packages. In HEAD, also document how to build docs using Meson, and adjust "ninja docs" to just build the HTML docs, for consistency with the default behavior of doc/src/sgml/Makefile. In a fit of neatnik-ism, I also made the ordering of the package lists match the order in which the tools are described at the head of the appendix. Aleksander Alekseev, Peter Eisentraut, Tom Lane Discussion: https://postgr.es/m/CAJ7c6TO8Aro2nxg=EQsVGiSDe-TstP4EsSvDHd7DSRsP40PgGA@mail.gmail.com --- doc/src/sgml/Makefile | 8 ++++-- doc/src/sgml/docguide.sgml | 55 ++++++++++++++++++------------------ doc/src/sgml/images/Makefile | 2 +- 3 files changed, 34 insertions(+), 31 deletions(-) diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile index d10f9e5b398..58ae606bccf 100644 --- a/doc/src/sgml/Makefile +++ b/doc/src/sgml/Makefile @@ -44,11 +44,15 @@ endif XMLINCLUDE = --path . -ifndef XMLLINT +ifdef XMLLINT +XMLLINT := $(XMLLINT) --nonet +else XMLLINT = $(missing) xmllint endif -ifndef XSLTPROC +ifdef XSLTPROC +XSLTPROC := $(XSLTPROC) --nonet +else XSLTPROC = $(missing) xsltproc endif diff --git a/doc/src/sgml/docguide.sgml b/doc/src/sgml/docguide.sgml index e1bac68604f..55ef6417749 100644 --- a/doc/src/sgml/docguide.sgml +++ b/doc/src/sgml/docguide.sgml @@ -136,6 +136,7 @@ This is a program for converting, among other things, XML to PDF. + It is needed only if you want to build the documentation in PDF format. @@ -151,25 +152,13 @@ here. - - You can get away with not installing DocBook XML and the DocBook XSLT - stylesheets locally, because the required files will be downloaded from the - Internet and cached locally. This may in fact be the preferred solution if - your operating system packages provide only an old version of these files, - or if no packages are available at all. - If you want to prevent any attempt to access the Internet while building - the documentation, you need to pass the option - to xmllint and xsltproc; see below - for an example. - - Installation on Fedora, RHEL, and Derivatives To install the required packages, use: -yum install docbook-dtds docbook-style-xsl fop libxslt +yum install docbook-dtds docbook-style-xsl libxslt fop @@ -180,7 +169,7 @@ yum install docbook-dtds docbook-style-xsl fop libxslt To install the required packages with pkg, use: -pkg install docbook-xml docbook-xsl fop libxslt +pkg install docbook-xml docbook-xsl libxslt fop @@ -199,7 +188,7 @@ pkg install docbook-xml docbook-xsl fop libxslt available for Debian GNU/Linux. To install, simply use: -apt-get install docbook-xml docbook-xsl fop libxml2-utils xsltproc +apt-get install docbook-xml docbook-xsl libxml2-utils xsltproc fop @@ -208,21 +197,37 @@ apt-get install docbook-xml docbook-xsl fop libxml2-utils xsltproc macOS - On macOS, you can build the HTML and man documentation without installing - anything extra. If you want to build PDFs or want to install a local copy - of DocBook, you can get those from your preferred package manager. + If you use MacPorts, the following will get you set up: + +sudo port install docbook-xml docbook-xsl-nons libxslt fop + + If you use Homebrew, use this: + +brew install docbook docbook-xsl libxslt fop + - If you use MacPorts, the following will get you set up: + The Homebrew-supplied programs require the following environment variable + to be set: -sudo port install docbook-xml-4.5 docbook-xsl fop +export XML_CATALOG_FILES=/usr/local/etc/xml/catalog - If you use Homebrew, use this: + Without it, xsltproc will throw errors like this: -brew install docbook docbook-xsl fop +I/O error : Attempt to load network entity http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd +postgres.sgml:21: warning: failed to load external entity "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" +... + + + While it is possible to use the Apple-provided versions + of xmllint and xsltproc + instead of those from MacPorts or Homebrew, you'll still need + to install the DocBook DTD and stylesheets, and set up a catalog + file that points to them. + @@ -253,12 +258,6 @@ checking for dbtoepub... dbtoepub these programs, for example ./configure ... XMLLINT=/opt/local/bin/xmllint ... - - Also, if you want to ensure that xmllint - and xsltproc will not perform any network access, - you can do something like - -./configure ... XMLLINT="xmllint --nonet" XSLTPROC="xsltproc --nonet" ... diff --git a/doc/src/sgml/images/Makefile b/doc/src/sgml/images/Makefile index f9e356348b2..645519095d0 100644 --- a/doc/src/sgml/images/Makefile +++ b/doc/src/sgml/images/Makefile @@ -9,7 +9,7 @@ ALL_IMAGES = \ DITAA = ditaa DOT = dot -XSLTPROC = xsltproc +XSLTPROC = xsltproc --nonet all: $(ALL_IMAGES) From 32c8bec45d8dc8a5d22fc8b10d64e81afadb2f45 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Mon, 13 Feb 2023 17:09:55 +1300 Subject: [PATCH 03/61] Disable WindowAgg inverse transitions when subplans are present When an aggregate function is used as a WindowFunc and a tuple transitions out of the window frame, we ordinarily try to make use of the aggregate function's inverse transition function to "unaggregate" the exiting tuple. This optimization is disabled for various cases, including when the aggregate contains a volatile function. In such a case we'd be unable to ensure that the transition value was calculated to the same value during transitions and inverse transitions. Unfortunately, we did this check by calling contain_volatile_functions() which does not recursively search SubPlans for volatile functions. If the aggregate function's arguments or its FILTER clause contained a subplan with volatile functions then we'd fail to notice this. Here we fix this by just disabling the optimization when the WindowFunc contains any subplans. Volatile functions are not the only reason that a subplan may have nonrepeatable results. Bug: #17777 Reported-by: Anban Company Discussion: https://postgr.es/m/17777-860b739b6efde977%40postgresql.org Reviewed-by: Tom Lane Backpatch-through: 11 --- src/backend/executor/nodeWindowAgg.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 611fcf94a89..f36a46cc79f 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -43,6 +43,8 @@ #include "nodes/execnodes.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/optimizer.h" #include "parser/parse_agg.h" #include "parser/parse_coerce.h" #include "parser/parse_oper.h" @@ -3032,16 +3034,24 @@ initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc, * aggregate's arguments (and FILTER clause if any) contain any calls to * volatile functions. Otherwise, the difference between restarting and * not restarting the aggregation would be user-visible. + * + * We also don't risk using moving aggregates when there are subplans in + * the arguments or FILTER clause. This is partly because + * contain_volatile_functions() doesn't look inside subplans; but there + * are other reasons why a subplan's output might be volatile. For + * example, syncscan mode can render the results nonrepeatable. */ if (!OidIsValid(aggform->aggminvtransfn)) use_ma_code = false; /* sine qua non */ else if (aggform->aggmfinalmodify == AGGMODIFY_READ_ONLY && - aggform->aggfinalmodify != AGGMODIFY_READ_ONLY) + aggform->aggfinalmodify != AGGMODIFY_READ_ONLY) use_ma_code = true; /* decision forced by safety */ else if (winstate->frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING) use_ma_code = false; /* non-moving frame head */ else if (contain_volatile_functions((Node *) wfunc)) use_ma_code = false; /* avoid possible behavioral change */ + else if (contain_subplans((Node *) wfunc)) + use_ma_code = false; /* subplans might contain volatile functions */ else use_ma_code = true; /* yes, let's use it */ if (use_ma_code) From 6b126f2862e4e61911013f37e3e9f79713c2f142 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 15 Feb 2023 10:12:33 +0900 Subject: [PATCH 04/61] Fix handling of SCRAM-SHA-256's channel binding with RSA-PSS certificates OpenSSL 1.1.1 and newer versions have added support for RSA-PSS certificates, which requires the use of a specific routine in OpenSSL to determine which hash function to use when compiling it when using channel binding in SCRAM-SHA-256. X509_get_signature_nid(), that is the original routine the channel binding code has relied on, is not able to determine which hash algorithm to use for such certificates. However, X509_get_signature_info(), new to OpenSSL 1.1.1, is able to do it. This commit switches the channel binding logic to rely on X509_get_signature_info() over X509_get_signature_nid(), which would be the choice when building with 1.1.1 or newer. The error could have been triggered on the client or the server, hence libpq and the backend need to have their related code paths patched. Note that attempting to load an RSA-PSS certificate with OpenSSL 1.1.0 or older leads to a failure due to an unsupported algorithm. The discovery of relying on X509_get_signature_info() comes from Jacob, the tests have been written by Heikki (with few tweaks from me), while I have bundled the whole together while adding the bits needed for MSVC and meson. This issue exists since channel binding exists, so backpatch all the way down. Some tests are added in 15~, triggered if compiling with OpenSSL 1.1.1 or newer, where the certificate and key files can easily be generated for RSA-PSS. Reported-by: Gunnar "Nick" Bluth Author: Jacob Champion, Heikki Linnakangas Discussion: https://postgr.es/m/17760-b6c61e752ec07060@postgresql.org Backpatch-through: 11 --- configure | 12 ++++++++++++ configure.ac | 2 ++ src/backend/libpq/be-secure-openssl.c | 9 +++++++-- src/include/libpq/libpq-be.h | 2 +- src/include/pg_config.h.in | 3 +++ src/interfaces/libpq/fe-secure-openssl.c | 9 +++++++-- src/interfaces/libpq/libpq-int.h | 2 +- src/tools/msvc/Solution.pm | 10 +++++++++- 8 files changed, 42 insertions(+), 7 deletions(-) diff --git a/configure b/configure index 3228096e38e..99a9961f025 100755 --- a/configure +++ b/configure @@ -15802,6 +15802,18 @@ if test "x$ac_cv_func_CRYPTO_lock" = xyes; then : #define HAVE_CRYPTO_LOCK 1 _ACEOF +fi +done + + # Function introduced in OpenSSL 1.1.1. + for ac_func in X509_get_signature_info +do : + ac_fn_c_check_func "$LINENO" "X509_get_signature_info" "ac_cv_func_X509_get_signature_info" +if test "x$ac_cv_func_X509_get_signature_info" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_X509_GET_SIGNATURE_INFO 1 +_ACEOF + fi done diff --git a/configure.ac b/configure.ac index 71c27d57691..a50ca5710c4 100644 --- a/configure.ac +++ b/configure.ac @@ -1741,6 +1741,8 @@ if test "$with_ssl" = openssl ; then # thread-safety. In 1.1.0, it's no longer required, and CRYPTO_lock() # function was removed. AC_CHECK_FUNCS([CRYPTO_lock]) + # Function introduced in OpenSSL 1.1.1. + AC_CHECK_FUNCS([X509_get_signature_info]) AC_DEFINE([USE_OPENSSL], 1, [Define to 1 to build with OpenSSL support. (--with-ssl=openssl)]) elif test "$with_ssl" != no ; then AC_MSG_ERROR([--with-ssl must specify openssl]) diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c index c48812f955a..e39952494e6 100644 --- a/src/backend/libpq/be-secure-openssl.c +++ b/src/backend/libpq/be-secure-openssl.c @@ -1308,7 +1308,7 @@ be_tls_get_peer_serial(Port *port, char *ptr, size_t len) ptr[0] = '\0'; } -#ifdef HAVE_X509_GET_SIGNATURE_NID +#if defined(HAVE_X509_GET_SIGNATURE_NID) || defined(HAVE_X509_GET_SIGNATURE_INFO) char * be_tls_get_certificate_hash(Port *port, size_t *len) { @@ -1326,10 +1326,15 @@ be_tls_get_certificate_hash(Port *port, size_t *len) /* * Get the signature algorithm of the certificate to determine the hash - * algorithm to use for the result. + * algorithm to use for the result. Prefer X509_get_signature_info(), + * introduced in OpenSSL 1.1.1, which can handle RSA-PSS signatures. */ +#if HAVE_X509_GET_SIGNATURE_INFO + if (!X509_get_signature_info(server_cert, &algo_nid, NULL, NULL, NULL)) +#else if (!OBJ_find_sigid_algs(X509_get_signature_nid(server_cert), &algo_nid, NULL)) +#endif elog(ERROR, "could not determine server certificate signature algorithm"); /* diff --git a/src/include/libpq/libpq-be.h b/src/include/libpq/libpq-be.h index de4883f4f69..8d2258aea44 100644 --- a/src/include/libpq/libpq-be.h +++ b/src/include/libpq/libpq-be.h @@ -307,7 +307,7 @@ extern void be_tls_get_peer_serial(Port *port, char *ptr, size_t len); * This is not supported with old versions of OpenSSL that don't have * the X509_get_signature_nid() function. */ -#if defined(USE_OPENSSL) && defined(HAVE_X509_GET_SIGNATURE_NID) +#if defined(USE_OPENSSL) && (defined(HAVE_X509_GET_SIGNATURE_NID) || defined(HAVE_X509_GET_SIGNATURE_INFO)) #define HAVE_BE_TLS_GET_CERTIFICATE_HASH extern char *be_tls_get_certificate_hash(Port *port, size_t *len); #endif diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index d02229909e9..2a06438113b 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -807,6 +807,9 @@ /* Define to 1 if you have the `writev' function. */ #undef HAVE_WRITEV +/* Define to 1 if you have the `X509_get_signature_info' function. */ +#undef HAVE_X509_GET_SIGNATURE_INFO + /* Define to 1 if you have the `X509_get_signature_nid' function. */ #undef HAVE_X509_GET_SIGNATURE_NID diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c index c0988e10a30..d75a823b880 100644 --- a/src/interfaces/libpq/fe-secure-openssl.c +++ b/src/interfaces/libpq/fe-secure-openssl.c @@ -378,7 +378,7 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len) return n; } -#ifdef HAVE_X509_GET_SIGNATURE_NID +#if defined(HAVE_X509_GET_SIGNATURE_NID) || defined(HAVE_X509_GET_SIGNATURE_INFO) char * pgtls_get_peer_certificate_hash(PGconn *conn, size_t *len) { @@ -398,10 +398,15 @@ pgtls_get_peer_certificate_hash(PGconn *conn, size_t *len) /* * Get the signature algorithm of the certificate to determine the hash - * algorithm to use for the result. + * algorithm to use for the result. Prefer X509_get_signature_info(), + * introduced in OpenSSL 1.1.1, which can handle RSA-PSS signatures. */ +#if HAVE_X509_GET_SIGNATURE_INFO + if (!X509_get_signature_info(peer_cert, &algo_nid, NULL, NULL, NULL)) +#else if (!OBJ_find_sigid_algs(X509_get_signature_nid(peer_cert), &algo_nid, NULL)) +#endif { appendPQExpBufferStr(&conn->errorMessage, libpq_gettext("could not determine server certificate signature algorithm\n")); diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index 0cbd611bd98..27321d262cf 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -869,7 +869,7 @@ extern ssize_t pgtls_write(PGconn *conn, const void *ptr, size_t len); * This is not supported with old versions of OpenSSL that don't have * the X509_get_signature_nid() function. */ -#if defined(USE_OPENSSL) && defined(HAVE_X509_GET_SIGNATURE_NID) +#if defined(USE_OPENSSL) && (defined(HAVE_X509_GET_SIGNATURE_NID) || defined(HAVE_X509_GET_SIGNATURE_INFO)) #define HAVE_PGTLS_GET_PEER_CERTIFICATE_HASH extern char *pgtls_get_peer_certificate_hash(PGconn *conn, size_t *len); #endif diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm index b6393b41773..2b9a77878a4 100644 --- a/src/tools/msvc/Solution.pm +++ b/src/tools/msvc/Solution.pm @@ -433,6 +433,7 @@ sub GenerateFiles HAVE_WCTYPE_H => 1, HAVE_WRITEV => undef, HAVE_X509_GET_SIGNATURE_NID => 1, + HAVE_X509_GET_SIGNATURE_INFO => undef, HAVE_X86_64_POPCNTQ => undef, HAVE__BOOL => undef, HAVE__BUILTIN_BSWAP16 => undef, @@ -548,7 +549,14 @@ sub GenerateFiles my ($digit1, $digit2, $digit3) = $self->GetOpenSSLVersion(); - # More symbols are needed with OpenSSL 1.1.0 and above. + # Symbols needed with OpenSSL 1.1.1 and above. + if ( ($digit1 >= '3' && $digit2 >= '0' && $digit3 >= '0') + || ($digit1 >= '1' && $digit2 >= '1' && $digit3 >= '1')) + { + $define{HAVE_X509_GET_SIGNATURE_INFO} = 1; + } + + # Symbols needed with OpenSSL 1.1.0 and above. if ( ($digit1 >= '3' && $digit2 >= '0' && $digit3 >= '0') || ($digit1 >= '1' && $digit2 >= '1' && $digit3 >= '0')) { From 1cdbdfce2d0005e89dd75921726130ee4a00e33d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 17 Feb 2023 16:40:34 -0500 Subject: [PATCH 05/61] Print the correct aliases for DML target tables in ruleutils. ruleutils.c blindly printed the user-given alias (or nothing if there hadn't been one) for the target table of INSERT/UPDATE/DELETE queries. That works a large percentage of the time, but not always: for queries appearing in WITH, it's possible that we chose a different alias to avoid conflict with outer-scope names. Since the chosen alias would be used in any Var references to the target table, this'd lead to an inconsistent printout with consequences such as dump/restore failures. The correct logic for printing (or not) a relation alias was embedded in get_from_clause_item. Factor it out to a separate function so that we don't need a jointree node to use it. (Only a limited part of that function can be reached from these new call sites, but this seems like the cleanest non-duplicative factorization.) In passing, I got rid of a redundant "\d+ rules_src" step in rules.sql. Initial report from Jonathan Katz; thanks to Vignesh C for analysis. This has been broken for a long time, so back-patch to all supported branches. Discussion: https://postgr.es/m/e947fa21-24b2-f922-375a-d4f763ef3e4b@postgresql.org Discussion: https://postgr.es/m/CALDaNm1MMntjmT_NJGp-Z=xbF02qHGAyuSHfYHias3TqQbPF2w@mail.gmail.com --- src/backend/utils/adt/ruleutils.c | 145 ++++++++++++++++------------ src/test/regress/expected/rules.out | 47 +++++---- src/test/regress/sql/rules.sql | 13 ++- 3 files changed, 126 insertions(+), 79 deletions(-) diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index bf663afb3c5..dbbb2a70a07 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -481,6 +481,8 @@ static void get_from_clause(Query *query, const char *prefix, deparse_context *context); static void get_from_clause_item(Node *jtnode, Query *query, deparse_context *context); +static void get_rte_alias(RangeTblEntry *rte, int varno, bool use_as, + deparse_context *context); static void get_column_alias_list(deparse_columns *colinfo, deparse_context *context); static void get_from_clause_coldeflist(RangeTblFunction *rtfunc, @@ -6661,12 +6663,14 @@ get_insert_query_def(Query *query, deparse_context *context, context->indentLevel += PRETTYINDENT_STD; appendStringInfoChar(buf, ' '); } - appendStringInfo(buf, "INSERT INTO %s ", + appendStringInfo(buf, "INSERT INTO %s", generate_relation_name(rte->relid, NIL)); - /* INSERT requires AS keyword for target alias */ - if (rte->alias != NULL) - appendStringInfo(buf, "AS %s ", - quote_identifier(rte->alias->aliasname)); + + /* Print the relation alias, if needed; INSERT requires explicit AS */ + get_rte_alias(rte, query->resultRelation, true, context); + + /* always want a space here */ + appendStringInfoChar(buf, ' '); /* * Add the insert-column-names list. Any indirection decoration needed on @@ -6848,9 +6852,10 @@ get_update_query_def(Query *query, deparse_context *context, appendStringInfo(buf, "UPDATE %s%s", only_marker(rte), generate_relation_name(rte->relid, NIL)); - if (rte->alias != NULL) - appendStringInfo(buf, " %s", - quote_identifier(rte->alias->aliasname)); + + /* Print the relation alias, if needed */ + get_rte_alias(rte, query->resultRelation, false, context); + appendStringInfoString(buf, " SET "); /* Deparse targetlist */ @@ -7056,9 +7061,9 @@ get_delete_query_def(Query *query, deparse_context *context, appendStringInfo(buf, "DELETE FROM %s%s", only_marker(rte), generate_relation_name(rte->relid, NIL)); - if (rte->alias != NULL) - appendStringInfo(buf, " %s", - quote_identifier(rte->alias->aliasname)); + + /* Print the relation alias, if needed */ + get_rte_alias(rte, query->resultRelation, false, context); /* Add the USING clause if given */ get_from_clause(query, " USING ", context); @@ -11231,10 +11236,8 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context) { int varno = ((RangeTblRef *) jtnode)->rtindex; RangeTblEntry *rte = rt_fetch(varno, query->rtable); - char *refname = get_rtable_name(varno, context); deparse_columns *colinfo = deparse_columns_fetch(varno, dpns); RangeTblFunction *rtfunc1 = NULL; - bool printalias; if (rte->lateral) appendStringInfoString(buf, "LATERAL "); @@ -11382,54 +11385,7 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context) } /* Print the relation alias, if needed */ - printalias = false; - if (rte->alias != NULL) - { - /* Always print alias if user provided one */ - printalias = true; - } - else if (colinfo->printaliases) - { - /* Always print alias if we need to print column aliases */ - printalias = true; - } - else if (rte->rtekind == RTE_RELATION) - { - /* - * No need to print alias if it's same as relation name (this - * would normally be the case, but not if set_rtable_names had to - * resolve a conflict). - */ - if (strcmp(refname, get_relation_name(rte->relid)) != 0) - printalias = true; - } - else if (rte->rtekind == RTE_FUNCTION || rte->rtekind == RTE_TABLEFUNCTION) - { - /* - * For a function RTE, always print alias. This covers possible - * renaming of the function and/or instability of the - * FigureColname rules for things that aren't simple functions. - * Note we'd need to force it anyway for the columndef list case. - */ - printalias = true; - } - else if (rte->rtekind == RTE_VALUES) - { - /* Alias is syntactically required for VALUES */ - printalias = true; - } - else if (rte->rtekind == RTE_CTE) - { - /* - * No need to print alias if it's same as CTE name (this would - * normally be the case, but not if set_rtable_names had to - * resolve a conflict). - */ - if (strcmp(refname, rte->ctename) != 0) - printalias = true; - } - if (printalias) - appendStringInfo(buf, " %s", quote_identifier(refname)); + get_rte_alias(rte, varno, false, context); /* Print the column definitions or aliases, if needed */ if (rtfunc1 && rtfunc1->funccolnames != NIL) @@ -11567,6 +11523,73 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context) (int) nodeTag(jtnode)); } +/* + * get_rte_alias - print the relation's alias, if needed + * + * If printed, the alias is preceded by a space, or by " AS " if use_as is true. + */ +static void +get_rte_alias(RangeTblEntry *rte, int varno, bool use_as, + deparse_context *context) +{ + deparse_namespace *dpns = (deparse_namespace *) linitial(context->namespaces); + char *refname = get_rtable_name(varno, context); + deparse_columns *colinfo = deparse_columns_fetch(varno, dpns); + bool printalias = false; + + if (rte->alias != NULL) + { + /* Always print alias if user provided one */ + printalias = true; + } + else if (colinfo->printaliases) + { + /* Always print alias if we need to print column aliases */ + printalias = true; + } + else if (rte->rtekind == RTE_RELATION) + { + /* + * No need to print alias if it's same as relation name (this would + * normally be the case, but not if set_rtable_names had to resolve a + * conflict). + */ + if (strcmp(refname, get_relation_name(rte->relid)) != 0) + printalias = true; + } + else if (rte->rtekind == RTE_FUNCTION) + { + /* + * For a function RTE, always print alias. This covers possible + * renaming of the function and/or instability of the FigureColname + * rules for things that aren't simple functions. Note we'd need to + * force it anyway for the columndef list case. + */ + printalias = true; + } + else if (rte->rtekind == RTE_SUBQUERY || + rte->rtekind == RTE_VALUES) + { + /* Alias is syntactically required for SUBQUERY and VALUES */ + printalias = true; + } + else if (rte->rtekind == RTE_CTE) + { + /* + * No need to print alias if it's same as CTE name (this would + * normally be the case, but not if set_rtable_names had to resolve a + * conflict). + */ + if (strcmp(refname, rte->ctename) != 0) + printalias = true; + } + + if (printalias) + appendStringInfo(context->buf, "%s%s", + use_as ? " AS " : " ", + quote_identifier(refname)); +} + /* * get_column_alias_list - print column alias list for an RTE * diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index a2922a0a9ec..a51729af420 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -3171,28 +3171,21 @@ select * from rules_log; (16 rows) create rule r4 as on delete to rules_src do notify rules_src_deletion; -\d+ rules_src - Table "public.rules_src" - Column | Type | Collation | Nullable | Default | Storage | Stats target | Description ---------+---------+-----------+----------+---------+---------+--------------+------------- - f1 | integer | | | | plain | | - f2 | integer | | | 0 | plain | | -Rules: - r1 AS - ON UPDATE TO rules_src DO INSERT INTO rules_log (f1, f2, tag, id) VALUES (old.f1,old.f2,'old'::text,DEFAULT), (new.f1,new.f2,'new'::text,DEFAULT) - r2 AS - ON UPDATE TO rules_src DO VALUES (old.f1,old.f2,'old'::text), (new.f1,new.f2,'new'::text) - r3 AS - ON INSERT TO rules_src DO INSERT INTO rules_log (f1, f2, tag, id) VALUES (NULL::integer,NULL::integer,'-'::text,DEFAULT), (new.f1,new.f2,'new'::text,DEFAULT) - r4 AS - ON DELETE TO rules_src DO - NOTIFY rules_src_deletion - -- -- Ensure an aliased target relation for insert is correctly deparsed. -- create rule r5 as on insert to rules_src do instead insert into rules_log AS trgt SELECT NEW.* RETURNING trgt.f1, trgt.f2; create rule r6 as on update to rules_src do instead UPDATE rules_log AS trgt SET tag = 'updated' WHERE trgt.f1 = new.f1; +-- +-- Check deparse disambiguation of INSERT/UPDATE/DELETE targets. +-- +create rule r7 as on delete to rules_src do instead + with wins as (insert into int4_tbl as trgt values (0) returning *), + wupd as (update int4_tbl trgt set f1 = f1+1 returning *), + wdel as (delete from int4_tbl trgt where f1 = 0 returning *) + insert into rules_log AS trgt select old.* from wins, wupd, wdel + returning trgt.f1, trgt.f2; +-- check display of all rules added above \d+ rules_src Table "public.rules_src" Column | Type | Collation | Nullable | Default | Storage | Stats target | Description @@ -3217,6 +3210,26 @@ Rules: r6 AS ON UPDATE TO rules_src DO INSTEAD UPDATE rules_log trgt SET tag = 'updated'::text WHERE trgt.f1 = new.f1 + r7 AS + ON DELETE TO rules_src DO INSTEAD WITH wins AS ( + INSERT INTO int4_tbl AS trgt_1 (f1) + VALUES (0) + RETURNING trgt_1.f1 + ), wupd AS ( + UPDATE int4_tbl trgt_1 SET f1 = trgt_1.f1 + 1 + RETURNING trgt_1.f1 + ), wdel AS ( + DELETE FROM int4_tbl trgt_1 + WHERE trgt_1.f1 = 0 + RETURNING trgt_1.f1 + ) + INSERT INTO rules_log AS trgt (f1, f2) SELECT old.f1, + old.f2 + FROM wins, + wupd, + wdel + RETURNING trgt.f1, + trgt.f2 -- -- Also check multiassignment deparsing. diff --git a/src/test/regress/sql/rules.sql b/src/test/regress/sql/rules.sql index 7b0cd28720c..1318482d747 100644 --- a/src/test/regress/sql/rules.sql +++ b/src/test/regress/sql/rules.sql @@ -1034,13 +1034,24 @@ insert into rules_src values(22,23), (33,default); select * from rules_src; select * from rules_log; create rule r4 as on delete to rules_src do notify rules_src_deletion; -\d+ rules_src -- -- Ensure an aliased target relation for insert is correctly deparsed. -- create rule r5 as on insert to rules_src do instead insert into rules_log AS trgt SELECT NEW.* RETURNING trgt.f1, trgt.f2; create rule r6 as on update to rules_src do instead UPDATE rules_log AS trgt SET tag = 'updated' WHERE trgt.f1 = new.f1; + +-- +-- Check deparse disambiguation of INSERT/UPDATE/DELETE targets. +-- +create rule r7 as on delete to rules_src do instead + with wins as (insert into int4_tbl as trgt values (0) returning *), + wupd as (update int4_tbl trgt set f1 = f1+1 returning *), + wdel as (delete from int4_tbl trgt where f1 = 0 returning *) + insert into rules_log AS trgt select old.* from wins, wupd, wdel + returning trgt.f1, trgt.f2; + +-- check display of all rules added above \d+ rules_src -- From 1bf595604aff657eb621c316e486b456fe3ce484 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Sun, 19 Feb 2023 00:41:18 +0100 Subject: [PATCH 06/61] Fix handling of multi-column BRIN indexes When evaluating clauses on multiple scan keys of a multi-column BRIN index, we can stop processing as soon as we find a scan key eliminating the range, and the range should not be added to tbe bitmap. That's how it worked before 14, but since a681e3c107a the code treated the range as matching if it matched at least the last scan key. Backpatch to 14, where this code was introduced. Backpatch-through: 14 Discussion: https://postgr.es/m/ebc18613-125e-60df-7520-fcbe0f9274fc%40enterprisedb.com --- src/backend/access/brin/brin.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index a64d70bafc7..a6549d9dcbf 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -777,6 +777,13 @@ bringetbitmap(IndexScanDesc scan, Node **bmNodeP) break; } } + + /* + * If we found a scan key eliminating the range, no need to + * check additional ones. + */ + if (!addrange) + break; } } } From 19c4d0acbf5ecca7320f7e1fda3d612eb6e1dd30 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Tue, 21 Feb 2023 10:56:37 +0100 Subject: [PATCH 07/61] pgbench: Prepare commands in pipelines in advance Failing to do so results in an error when a pgbench script tries to start a serializable transaction inside a pipeline, because by the time BEGIN ISOLATION LEVEL SERIALIZABLE is executed, we're already in a transaction that has acquired a snapshot, so the server rightfully complains. We can work around that by preparing all commands in the pipeline before actually starting the pipeline. This changes the existing code in two aspects: first, we now prepare each command individually at the point where that command is about to be executed; previously, we would prepare all commands in a script as soon as the first command of that script would be executed. It's hard to see that this would make much of a difference (particularly since it only affects the first time to execute each script in a client), but I didn't actually try to measure it. Secondly, we no longer use PQsendPrepare() in pipeline mode, but only PQprepare. There's no specific reason for this change other than no longer needing to do differently in pipeline mode. (Previously we had no choice, because in pipeline mode PQprepare could not be used.) Backpatch to 14, where pgbench got support for pipeline mode. Reported-by: Yugo NAGATA Discussion: https://postgr.es/m/20210716153013.fc53b1c780b06fccc07a7f0d@sraoss.co.jp --- src/bin/pgbench/pgbench.c | 162 +++++++++++++------ src/bin/pgbench/t/001_pgbench_with_server.pl | 20 +++ 2 files changed, 130 insertions(+), 52 deletions(-) diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index e0dfadcf414..c87bc3dd007 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -481,7 +481,8 @@ typedef struct pg_time_usec_t txn_begin; /* used for measuring schedule lag times */ pg_time_usec_t stmt_begin; /* used for measuring statement latencies */ - bool prepared[MAX_SCRIPTS]; /* whether client prepared the script */ + /* whether client prepared each command of each script */ + bool **prepared; /* per client collected stats */ int64 cnt; /* client transaction count, for -t */ @@ -573,7 +574,8 @@ static const char *QUERYMODE[] = {"simple", "extended", "prepared"}; * argv Command arguments, the first of which is the command or SQL * string itself. For SQL commands, after post-processing * argv[0] is the same as 'lines' with variables substituted. - * varprefix SQL commands terminated with \gset or \aset have this set + * prepname The name that this command is prepared under, in prepare mode + * varprefix SQL commands terminated with \gset or \aset have this set * to a non NULL value. If nonempty, it's used to prefix the * variable name that receives the value. * aset do gset on all possible queries of a combined query (\;). @@ -588,6 +590,7 @@ typedef struct Command MetaCommand meta; int argc; char *argv[MAX_ARGS]; + char *prepname; char *varprefix; PgBenchExpr *expr; SimpleStats stats; @@ -2836,13 +2839,9 @@ runShellCommand(CState *st, char *variable, char **argv, int argc) return true; } -#define MAX_PREPARE_NAME 32 -static void -preparedStatementName(char *buffer, int file, int state) -{ - sprintf(buffer, "P%d_%d", file, state); -} - +/* + * Report the abortion of the client when processing SQL commands. + */ static void commandFailed(CState *st, const char *cmd, const char *message) { @@ -2869,6 +2868,87 @@ chooseScript(TState *thread) return i - 1; } +/* + * Prepare the SQL command from st->use_file at command_num. + */ +static void +prepareCommand(CState *st, int command_num) +{ + Command *command = sql_script[st->use_file].commands[command_num]; + + /* No prepare for non-SQL commands */ + if (command->type != SQL_COMMAND) + return; + + /* + * If not already done, allocate space for 'prepared' flags: one boolean + * for each command of each script. + */ + if (!st->prepared) + { + st->prepared = pg_malloc(sizeof(bool *) * num_scripts); + for (int i = 0; i < num_scripts; i++) + { + ParsedScript *script = &sql_script[i]; + int numcmds; + + for (numcmds = 0; script->commands[numcmds] != NULL; numcmds++) + ; + st->prepared[i] = pg_malloc0(sizeof(bool) * numcmds); + } + } + + if (!st->prepared[st->use_file][command_num]) + { + PGresult *res; + + pg_log_debug("client %d preparing %s", st->id, command->prepname); + res = PQprepare(st->con, command->prepname, + command->argv[0], command->argc - 1, NULL); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + pg_log_error("%s", PQerrorMessage(st->con)); + PQclear(res); + st->prepared[st->use_file][command_num] = true; + } +} + +/* + * Prepare all the commands in the script that come after the \startpipeline + * that's at position st->command, and the first \endpipeline we find. + * + * This sets the ->prepared flag for each relevant command as well as the + * \startpipeline itself, but doesn't move the st->command counter. + */ +static void +prepareCommandsInPipeline(CState *st) +{ + int j; + Command **commands = sql_script[st->use_file].commands; + + Assert(commands[st->command]->type == META_COMMAND && + commands[st->command]->meta == META_STARTPIPELINE); + + /* + * We set the 'prepared' flag on the \startpipeline itself to flag that we + * don't need to do this next time without calling prepareCommand(), even + * though we don't actually prepare this command. + */ + if (st->prepared && + st->prepared[st->use_file][st->command]) + return; + + for (j = st->command + 1; commands[j] != NULL; j++) + { + if (commands[j]->type == META_COMMAND && + commands[j]->meta == META_ENDPIPELINE) + break; + + prepareCommand(st, j); + } + + st->prepared[st->use_file][st->command] = true; +} + /* Send a SQL command, using the chosen querymode */ static bool sendCommand(CState *st, Command *command) @@ -2899,50 +2979,13 @@ sendCommand(CState *st, Command *command) } else if (querymode == QUERY_PREPARED) { - char name[MAX_PREPARE_NAME]; const char *params[MAX_ARGS]; - if (!st->prepared[st->use_file]) - { - int j; - Command **commands = sql_script[st->use_file].commands; - - for (j = 0; commands[j] != NULL; j++) - { - PGresult *res; - char name[MAX_PREPARE_NAME]; - - if (commands[j]->type != SQL_COMMAND) - continue; - preparedStatementName(name, st->use_file, j); - if (PQpipelineStatus(st->con) == PQ_PIPELINE_OFF) - { - res = PQprepare(st->con, name, - commands[j]->argv[0], commands[j]->argc - 1, NULL); - if (PQresultStatus(res) != PGRES_COMMAND_OK) - pg_log_error("%s", PQerrorMessage(st->con)); - PQclear(res); - } - else - { - /* - * In pipeline mode, we use asynchronous functions. If a - * server-side error occurs, it will be processed later - * among the other results. - */ - if (!PQsendPrepare(st->con, name, - commands[j]->argv[0], commands[j]->argc - 1, NULL)) - pg_log_error("%s", PQerrorMessage(st->con)); - } - } - st->prepared[st->use_file] = true; - } - + prepareCommand(st, st->command); getQueryParams(st, command, params); - preparedStatementName(name, st->use_file, st->command); - pg_log_debug("client %d sending %s", st->id, name); - r = PQsendQueryPrepared(st->con, name, command->argc - 1, + pg_log_debug("client %d sending %s", st->id, command->prepname); + r = PQsendQueryPrepared(st->con, command->prepname, command->argc - 1, params, NULL, NULL, 0); } else /* unknown sql mode */ @@ -3202,7 +3245,8 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg) thread->conn_duration += now - start; /* Reset session-local state */ - memset(st->prepared, 0, sizeof(st->prepared)); + pg_free(st->prepared); + st->prepared = NULL; } /* record transaction start time */ @@ -3777,6 +3821,16 @@ executeMetaCommand(CState *st, pg_time_usec_t *now) return CSTATE_ABORTED; } + /* + * If we're in prepared-query mode, we need to prepare all the + * commands that are inside the pipeline before we actually start the + * pipeline itself. This solves the problem that running BEGIN + * ISOLATION LEVEL SERIALIZABLE in a pipeline would fail due to a + * snapshot having been acquired by the prepare within the pipeline. + */ + if (querymode == QUERY_PREPARED) + prepareCommandsInPipeline(st); + if (PQpipelineStatus(st->con) != PQ_PIPELINE_OFF) { commandFailed(st, "startpipeline", "already in pipeline mode"); @@ -4818,6 +4872,7 @@ create_sql_command(PQExpBuffer buf, const char *source) my_command->varprefix = NULL; /* allocated later, if needed */ my_command->expr = NULL; initSimpleStats(&my_command->stats); + my_command->prepname = NULL; /* set later, if needed */ return my_command; } @@ -4849,6 +4904,7 @@ static void postprocess_sql_command(Command *my_command) { char buffer[128]; + static int prepnum = 0; Assert(my_command->type == SQL_COMMAND); @@ -4857,15 +4913,17 @@ postprocess_sql_command(Command *my_command) buffer[strcspn(buffer, "\n\r")] = '\0'; my_command->first_line = pg_strdup(buffer); - /* parse query if necessary */ + /* Parse query and generate prepared statement name, if necessary */ switch (querymode) { case QUERY_SIMPLE: my_command->argv[0] = my_command->lines.data; my_command->argc++; break; - case QUERY_EXTENDED: case QUERY_PREPARED: + my_command->prepname = psprintf("P_%d", prepnum++); + /* fall through */ + case QUERY_EXTENDED: if (!parseQuery(my_command)) exit(1); break; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index 282ccc24aeb..76ecd9efeba 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -841,6 +841,26 @@ } }); +# Working \startpipeline in prepared query mode with serializable +$node->pgbench( + '-c4 -j2 -t 10 -n -M prepared', + 0, + [ + qr{type: .*/001_pgbench_pipeline_serializable}, + qr{actually processed: (\d+)/\1} + ], + [], + 'working \startpipeline with serializable', + { + '001_pgbench_pipeline_serializable' => q{ +-- test startpipeline with serializable +\startpipeline +BEGIN ISOLATION LEVEL SERIALIZABLE; +} . "select 1;\n" x 10 . q{ +END; +\endpipeline +} + }); # trigger many expression errors my @errors = ( From 8f706943e5c20528ab783cc22989b3e4f8499326 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Wed, 22 Feb 2023 13:26:20 +0000 Subject: [PATCH 08/61] Add missing support for the latest SPI status codes. SPI_result_code_string() was missing support for SPI_OK_TD_REGISTER, and in v15 and later, it was missing support for SPI_OK_MERGE, as was pltcl_process_SPI_result(). The last of those would trigger an error if a MERGE was executed from PL/Tcl. The others seem fairly innocuous, but worth fixing. Back-patch to all supported branches. Before v15, this is just adding SPI_OK_TD_REGISTER to SPI_result_code_string(), which is unlikely to be seen by anyone, but seems worth doing for completeness. Reviewed by Tom Lane. Discussion: https://postgr.es/m/CAEZATCUg8V%2BK%2BGcafOPqymxk84Y_prXgfe64PDoopjLFH6Z0Aw%40mail.gmail.com https://postgr.es/m/CAEZATCUMe%2B_KedPMM9AxKqm%3DSZogSxjUcrMe%2BsakusZh3BFcQw%40mail.gmail.com --- src/backend/executor/spi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 4a2ddd5dff3..8f04c342d5a 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -2084,6 +2084,8 @@ SPI_result_code_string(int code) return "SPI_OK_REL_REGISTER"; case SPI_OK_REL_UNREGISTER: return "SPI_OK_REL_UNREGISTER"; + case SPI_OK_TD_REGISTER: + return "SPI_OK_TD_REGISTER"; } /* Unrecognized code ... return something useful ... */ sprintf(buf, "Unrecognized SPI code %d", code); From 52fd7adf33e231858b95231169fda6b956e661ff Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Wed, 22 Feb 2023 15:24:09 +0100 Subject: [PATCH 09/61] Fix snapshot handling in logicalmsg_decode Whe decoding a transactional logical message, logicalmsg_decode called SnapBuildGetOrBuildSnapshot. But we may not have a consistent snapshot yet at that point. We don't actually need the snapshot in this case (during replay we'll have the snapshot from the transaction), so in practice this is harmless. But in assert-enabled build this crashes. Fixed by requesting the snapshot only in non-transactional case, where we are guaranteed to have SNAPBUILD_CONSISTENT. Backpatch to 11. The issue exists since 9.6. Backpatch-through: 11 Reviewed-by: Andres Freund Discussion: https://postgr.es/m/84d60912-6eab-9b84-5de3-41765a5449e8@enterprisedb.com --- src/backend/replication/logical/decode.c | 14 ++++++++++++-- src/backend/replication/logical/reorderbuffer.c | 10 ++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index 7a6323c3989..755d7ae6d2d 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -572,7 +572,7 @@ logicalmsg_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) TransactionId xid = XLogRecGetXid(r); uint8 info = XLogRecGetInfo(r) & ~XLR_INFO_MASK; RepOriginId origin_id = XLogRecGetOrigin(r); - Snapshot snapshot; + Snapshot snapshot = NULL; xl_logical_message *message; if (info != XLOG_LOGICAL_MESSAGE) @@ -602,7 +602,17 @@ logicalmsg_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) SnapBuildXactNeedsSkip(builder, buf->origptr))) return; - snapshot = SnapBuildGetOrBuildSnapshot(builder, xid); + /* + * If this is a non-transactional change, get the snapshot we're expected + * to use. We only get here when the snapshot is consistent, and the + * change is not meant to be skipped. + * + * For transactional changes we don't need a snapshot, we'll use the + * regular snapshot maintained by ReorderBuffer. We just leave it NULL. + */ + if (!message->transactional) + snapshot = SnapBuildGetOrBuildSnapshot(builder, xid); + ReorderBufferQueueMessage(ctx->reorder, xid, snapshot, buf->endptr, message->transactional, message->message, /* first part of message is diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 709365fc8c6..721fa652d25 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -821,6 +821,13 @@ ReorderBufferQueueMessage(ReorderBuffer *rb, TransactionId xid, Assert(xid != InvalidTransactionId); + /* + * We don't expect snapshots for transactional changes - we'll use the + * snapshot derived later during apply (unless the change gets + * skipped). + */ + Assert(!snapshot); + oldcontext = MemoryContextSwitchTo(rb->context); change = ReorderBufferGetChange(rb); @@ -839,6 +846,9 @@ ReorderBufferQueueMessage(ReorderBuffer *rb, TransactionId xid, ReorderBufferTXN *txn = NULL; volatile Snapshot snapshot_now = snapshot; + /* Non-transactional changes require a valid snapshot. */ + Assert(snapshot_now); + if (xid != InvalidTransactionId) txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true); From 9d08e8d5474497b4f3b23fd5b9e8f105acfe41d1 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 23 Feb 2023 15:40:28 -0500 Subject: [PATCH 10/61] Don't repeatedly register cache callbacks in pgoutput plugin. Multiple cycles of starting up and shutting down the plugin within a single session would eventually lead to "out of relcache_callback_list slots", because pgoutput_startup blindly re-registered its cache callbacks each time. Fix it to register them only once, as all other users of cache callbacks already take care to do. This has been broken all along, so back-patch to all supported branches. Shi Yu Discussion: https://postgr.es/m/OSZPR01MB631004A78D743D68921FFAD3FDA79@OSZPR01MB6310.jpnprd01.prod.outlook.com --- src/backend/replication/pgoutput/pgoutput.c | 24 ++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c index ff9cf5d406d..df2ea94d468 100644 --- a/src/backend/replication/pgoutput/pgoutput.c +++ b/src/backend/replication/pgoutput/pgoutput.c @@ -260,6 +260,7 @@ pgoutput_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is_init) { PGOutputData *data = palloc0(sizeof(PGOutputData)); + static bool publication_callback_registered = false; /* Create our memory context for private allocations. */ data->context = AllocSetContextCreate(ctx->context, @@ -323,9 +324,18 @@ pgoutput_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt, /* Init publication state. */ data->publications = NIL; publications_valid = false; - CacheRegisterSyscacheCallback(PUBLICATIONOID, - publication_invalidation_cb, - (Datum) 0); + + /* + * Register callback for pg_publication if we didn't already do that + * during some previous call in this process. + */ + if (!publication_callback_registered) + { + CacheRegisterSyscacheCallback(PUBLICATIONOID, + publication_invalidation_cb, + (Datum) 0); + publication_callback_registered = true; + } /* Initialize relation schema cache. */ init_rel_sync_cache(CacheMemoryContext); @@ -948,7 +958,9 @@ static void init_rel_sync_cache(MemoryContext cachectx) { HASHCTL ctl; + static bool relation_callbacks_registered = false; + /* Nothing to do if hash table already exists */ if (RelationSyncCache != NULL) return; @@ -963,10 +975,16 @@ init_rel_sync_cache(MemoryContext cachectx) Assert(RelationSyncCache != NULL); + /* No more to do if we already registered callbacks */ + if (relation_callbacks_registered) + return; + CacheRegisterRelcacheCallback(rel_sync_cache_relation_cb, (Datum) 0); CacheRegisterSyscacheCallback(PUBLICATIONRELMAP, rel_sync_cache_publication_cb, (Datum) 0); + + relation_callbacks_registered = true; } /* From dfec68fb87e1d1ace84005bbe51930afa3071989 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Sat, 25 Feb 2023 14:44:49 +0000 Subject: [PATCH 11/61] Fix mishandling of OLD/NEW references in subqueries in rule actions. If a rule action contains a subquery that refers to columns from OLD or NEW, then those are really lateral references, and the planner will complain if it sees such things in a subquery that isn't marked as lateral. However, at rule-definition time, the user isn't required to mark the subquery with LATERAL, and so it can fail when the rule is used. Fix this by marking such subqueries as lateral in the rewriter, at the point where they're used. Dean Rasheed and Tom Lane, per report from Alexander Lakhin. Back-patch to all supported branches. Discussion: https://postgr.es/m/5e09da43-aaba-7ea7-0a51-a2eb981b058b%40gmail.com --- src/backend/rewrite/rewriteHandler.c | 22 ++++++++++++++++++---- src/test/regress/expected/rules.out | 25 +++++++++++++++++++++++++ src/test/regress/sql/rules.sql | 17 +++++++++++++++++ 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 2938edf1c6a..5fe8136ab9a 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -403,6 +403,7 @@ rewriteRuleAction(Query *parsetree, Query *sub_action; Query **sub_action_ptr; acquireLocksOnSubLinks_context context; + ListCell *lc; context.for_execute = true; @@ -441,6 +442,23 @@ rewriteRuleAction(Query *parsetree, ChangeVarNodes(rule_qual, PRS2_OLD_VARNO + rt_length, rt_index, 0); + /* + * Mark any subquery RTEs in the rule action as LATERAL if they contain + * Vars referring to the current query level (references to NEW/OLD). + * Those really are lateral references, but we've historically not + * required users to mark such subqueries with LATERAL explicitly. But + * the planner will complain if such Vars exist in a non-LATERAL subquery, + * so we have to fix things up here. + */ + foreach(lc, sub_action->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + if (rte->rtekind == RTE_SUBQUERY && !rte->lateral && + contain_vars_of_level((Node *) rte->subquery, 1)) + rte->lateral = true; + } + /* * Generate expanded rtable consisting of main parsetree's rtable plus * rule action's rtable; this becomes the complete rtable for the rule @@ -482,8 +500,6 @@ rewriteRuleAction(Query *parsetree, */ if (parsetree->hasSubLinks && !sub_action->hasSubLinks) { - ListCell *lc; - foreach(lc, parsetree->rtable) { RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); @@ -585,8 +601,6 @@ rewriteRuleAction(Query *parsetree, */ if (parsetree->cteList != NIL && sub_action->commandType != CMD_UTILITY) { - ListCell *lc; - /* * Annoying implementation restriction: because CTEs are identified by * name within a cteList, we can't merge a CTE from the original query diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index a51729af420..77fbb582cab 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -3256,6 +3256,31 @@ Rules: drop table rule_t1, rule_dest; -- +-- Test implicit LATERAL references to old/new in rules +-- +CREATE TABLE rule_t1(a int, b text DEFAULT 'xxx', c int); +CREATE VIEW rule_v1 AS SELECT * FROM rule_t1; +CREATE RULE v1_ins AS ON INSERT TO rule_v1 + DO ALSO INSERT INTO rule_t1 + SELECT * FROM (SELECT a + 10 FROM rule_t1 WHERE a = NEW.a) tt; +CREATE RULE v1_upd AS ON UPDATE TO rule_v1 + DO ALSO UPDATE rule_t1 t + SET c = tt.a * 10 + FROM (SELECT a FROM rule_t1 WHERE a = OLD.a) tt WHERE t.a = tt.a; +INSERT INTO rule_v1 VALUES (1, 'a'), (2, 'b'); +UPDATE rule_v1 SET b = upper(b); +SELECT * FROM rule_t1; + a | b | c +----+-----+----- + 1 | A | 10 + 2 | B | 20 + 11 | XXX | 110 + 12 | XXX | 120 +(4 rows) + +DROP TABLE rule_t1 CASCADE; +NOTICE: drop cascades to view rule_v1 +-- -- check alter rename rule -- CREATE TABLE rule_t1 (a INT); diff --git a/src/test/regress/sql/rules.sql b/src/test/regress/sql/rules.sql index 1318482d747..90b37af2097 100644 --- a/src/test/regress/sql/rules.sql +++ b/src/test/regress/sql/rules.sql @@ -1065,6 +1065,23 @@ create rule rr as on update to rule_t1 do instead UPDATE rule_dest trgt \d+ rule_t1 drop table rule_t1, rule_dest; +-- +-- Test implicit LATERAL references to old/new in rules +-- +CREATE TABLE rule_t1(a int, b text DEFAULT 'xxx', c int); +CREATE VIEW rule_v1 AS SELECT * FROM rule_t1; +CREATE RULE v1_ins AS ON INSERT TO rule_v1 + DO ALSO INSERT INTO rule_t1 + SELECT * FROM (SELECT a + 10 FROM rule_t1 WHERE a = NEW.a) tt; +CREATE RULE v1_upd AS ON UPDATE TO rule_v1 + DO ALSO UPDATE rule_t1 t + SET c = tt.a * 10 + FROM (SELECT a FROM rule_t1 WHERE a = OLD.a) tt WHERE t.a = tt.a; +INSERT INTO rule_v1 VALUES (1, 'a'), (2, 'b'); +UPDATE rule_v1 SET b = upper(b); +SELECT * FROM rule_t1; +DROP TABLE rule_t1 CASCADE; + -- -- check alter rename rule -- From fcb2564bf1864c10abeab7c134424ea4e46b2900 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Sun, 26 Feb 2023 06:48:41 -0500 Subject: [PATCH 12/61] Don't force SQL_ASCII/no-locale for installcheck in vcregress.pl It's been this way for a very long time, but it appears to have been masking an issue that only manifests with different settings. Therefore, run the tests in the installation's default encoding/locale. Backpatch to all live branches. --- src/tools/msvc/vcregress.pl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/tools/msvc/vcregress.pl b/src/tools/msvc/vcregress.pl index d4e7d69df2e..885249d08a3 100644 --- a/src/tools/msvc/vcregress.pl +++ b/src/tools/msvc/vcregress.pl @@ -163,9 +163,7 @@ sub installcheck_internal "--bindir=../../../$Config/psql", "--schedule=${schedule}_schedule", "--max-concurrent-tests=20", - "--make-testtablespace-dir", - "--encoding=SQL_ASCII", - "--no-locale"); + "--make-testtablespace-dir"); push(@args, $maxconn) if $maxconn; push(@args, @EXTRA_REGRESS_OPTS); system(@args); From fc9ab81626dd408f33984f49e6364cb788ed5dea Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 27 Feb 2023 16:29:51 -0500 Subject: [PATCH 13/61] Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de --- .../postgres_fdw/expected/postgres_fdw.out | 26 ++++++------------- contrib/postgres_fdw/sql/postgres_fdw.sql | 18 ++++++------- 2 files changed, 16 insertions(+), 28 deletions(-) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 67fde96a858..10700d6fd4a 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -9730,11 +9730,6 @@ WARNING: there is no transaction in progress -- Change application_name of remote connection to special one -- so that we can easily terminate the connection later. ALTER SERVER loopback OPTIONS (application_name 'fdw_retry_check'); --- If debug_discard_caches is active, it results in --- dropping remote connections after every transaction, making it --- impossible to test termination meaningfully. So turn that off --- for this test. -SET debug_discard_caches = 0; -- Make sure we have a remote connection. SELECT 1 FROM ft1 LIMIT 1; ?column? @@ -9743,13 +9738,12 @@ SELECT 1 FROM ft1 LIMIT 1; (1 row) -- Terminate the remote connection and wait for the termination to complete. -SELECT pg_terminate_backend(pid, 180000) FROM pg_stat_activity +-- (If a cache flush happens, the remote connection might have already been +-- dropped; so code this step in a way that doesn't fail if no connection.) +DO $$ BEGIN +PERFORM pg_terminate_backend(pid, 180000) FROM pg_stat_activity WHERE application_name = 'fdw_retry_check'; - pg_terminate_backend ----------------------- - t -(1 row) - +END $$; -- This query should detect the broken connection when starting new remote -- transaction, reestablish new connection, and then succeed. BEGIN; @@ -9762,13 +9756,10 @@ SELECT 1 FROM ft1 LIMIT 1; -- If we detect the broken connection when starting a new remote -- subtransaction, we should fail instead of establishing a new connection. -- Terminate the remote connection and wait for the termination to complete. -SELECT pg_terminate_backend(pid, 180000) FROM pg_stat_activity +DO $$ BEGIN +PERFORM pg_terminate_backend(pid, 180000) FROM pg_stat_activity WHERE application_name = 'fdw_retry_check'; - pg_terminate_backend ----------------------- - t -(1 row) - +END $$; SAVEPOINT s; -- The text of the error might vary across platforms, so only show SQLSTATE. \set VERBOSITY sqlstate @@ -9776,7 +9767,6 @@ SELECT 1 FROM ft1 LIMIT 1; -- should fail ERROR: 08006 \set VERBOSITY default COMMIT; -RESET debug_discard_caches; -- ============================================================================= -- test connection invalidation cases and postgres_fdw_get_connections function -- ============================================================================= diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index f8c813d2175..793dd64811d 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -2912,18 +2912,16 @@ ROLLBACK; -- so that we can easily terminate the connection later. ALTER SERVER loopback OPTIONS (application_name 'fdw_retry_check'); --- If debug_discard_caches is active, it results in --- dropping remote connections after every transaction, making it --- impossible to test termination meaningfully. So turn that off --- for this test. -SET debug_discard_caches = 0; - -- Make sure we have a remote connection. SELECT 1 FROM ft1 LIMIT 1; -- Terminate the remote connection and wait for the termination to complete. -SELECT pg_terminate_backend(pid, 180000) FROM pg_stat_activity +-- (If a cache flush happens, the remote connection might have already been +-- dropped; so code this step in a way that doesn't fail if no connection.) +DO $$ BEGIN +PERFORM pg_terminate_backend(pid, 180000) FROM pg_stat_activity WHERE application_name = 'fdw_retry_check'; +END $$; -- This query should detect the broken connection when starting new remote -- transaction, reestablish new connection, and then succeed. @@ -2933,8 +2931,10 @@ SELECT 1 FROM ft1 LIMIT 1; -- If we detect the broken connection when starting a new remote -- subtransaction, we should fail instead of establishing a new connection. -- Terminate the remote connection and wait for the termination to complete. -SELECT pg_terminate_backend(pid, 180000) FROM pg_stat_activity +DO $$ BEGIN +PERFORM pg_terminate_backend(pid, 180000) FROM pg_stat_activity WHERE application_name = 'fdw_retry_check'; +END $$; SAVEPOINT s; -- The text of the error might vary across platforms, so only show SQLSTATE. \set VERBOSITY sqlstate @@ -2942,8 +2942,6 @@ SELECT 1 FROM ft1 LIMIT 1; -- should fail \set VERBOSITY default COMMIT; -RESET debug_discard_caches; - -- ============================================================================= -- test connection invalidation cases and postgres_fdw_get_connections function -- ============================================================================= From aa797f5a2a77b938f56f57007e8b455293288004 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 1 Mar 2023 11:30:17 -0500 Subject: [PATCH 14/61] Avoid fetching one past the end of translate()'s "to" parameter. This is usually harmless, but if you were very unlucky it could provoke a segfault due to the "to" string being right up against the end of memory. Found via valgrind testing (so we might've found it earlier, except that our regression tests lacked any exercise of translate()'s deletion feature). Fix by switching the order of the test-for-end-of-string and advance-pointer steps. While here, compute "to_ptr + tolen" just once. (Smarter compilers might figure that out for themselves, but let's just make sure.) Report and fix by Daniil Anisimov, in bug #17816. Discussion: https://postgr.es/m/17816-70f3d2764e88a108@postgresql.org --- src/backend/utils/adt/oracle_compat.c | 12 +++++++----- src/test/regress/expected/strings.out | 6 ++++++ src/test/regress/sql/strings.sql | 1 + 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c index f737aa6fbde..bd9e5f9e243 100644 --- a/src/backend/utils/adt/oracle_compat.c +++ b/src/backend/utils/adt/oracle_compat.c @@ -797,7 +797,8 @@ translate(PG_FUNCTION_ARGS) text *to = PG_GETARG_TEXT_PP(2); text *result; char *from_ptr, - *to_ptr; + *to_ptr, + *to_end; char *source, *target; int m, @@ -819,6 +820,7 @@ translate(PG_FUNCTION_ARGS) from_ptr = VARDATA_ANY(from); tolen = VARSIZE_ANY_EXHDR(to); to_ptr = VARDATA_ANY(to); + to_end = to_ptr + tolen; /* * The worst-case expansion is to substitute a max-length character for a @@ -852,16 +854,16 @@ translate(PG_FUNCTION_ARGS) } if (i < fromlen) { - /* substitute */ + /* substitute, or delete if no corresponding "to" character */ char *p = to_ptr; for (i = 0; i < from_index; i++) { - p += pg_mblen(p); - if (p >= (to_ptr + tolen)) + if (p >= to_end) break; + p += pg_mblen(p); } - if (p < (to_ptr + tolen)) + if (p < to_end) { len = pg_mblen(p); memcpy(target, p, len); diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 1745ca9ca68..d1485df7063 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -2256,6 +2256,12 @@ SELECT translate('12345', '14', 'ax'); a23x5 (1 row) +SELECT translate('12345', '134', 'a'); + translate +----------- + a25 +(1 row) + SELECT ascii('x'); ascii ------- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index c53727f68d3..3c438a304ac 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -770,6 +770,7 @@ SELECT ltrim('zzzytrim', 'xyz'); SELECT translate('', '14', 'ax'); SELECT translate('12345', '14', 'ax'); +SELECT translate('12345', '134', 'a'); SELECT ascii('x'); SELECT ascii(''); From 4a37abbf282daf26ff70857a92714a69a500ca07 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 2 Mar 2023 14:03:21 +0900 Subject: [PATCH 15/61] pageinspect: Fix crash with gist_page_items() Attempting to use this function with a raw page not coming from a GiST index would cause a crash, as it was missing the same sanity checks as gist_page_items_bytea(). This slightly refactors the code so as all the basic validation checks for GiST pages are done in a single routine, in the same fashion as the pageinspect functions for hash and BRIN. This fixes an issue similar to 076f4d9. A test is added to stress for this case. While on it, I have added a similar test for brin_page_items() with a combination make of a valid GiST index and a raw btree page. This one was already protected, but it was not tested. Reported-by: Egor Chindyaskin Author: Dmitry Koval Discussion: https://postgr.es/m/17815-fc4a2d3b74705703@postgresql.org Backpatch-through: 14 --- contrib/pageinspect/expected/brin.out | 8 ++- contrib/pageinspect/expected/gist.out | 10 ++-- contrib/pageinspect/gistfuncs.c | 82 +++++++++++++-------------- contrib/pageinspect/sql/brin.sql | 8 ++- contrib/pageinspect/sql/gist.sql | 10 ++-- 5 files changed, 62 insertions(+), 56 deletions(-) diff --git a/contrib/pageinspect/expected/brin.out b/contrib/pageinspect/expected/brin.out index d19cdc3b957..e12fbeb4774 100644 --- a/contrib/pageinspect/expected/brin.out +++ b/contrib/pageinspect/expected/brin.out @@ -48,12 +48,14 @@ SELECT * FROM brin_page_items(get_raw_page('test1_a_idx', 2), 'test1_a_idx') 1 | 0 | 1 | f | f | f | {1 .. 1} (1 row) --- Failure for non-BRIN index. +-- Mask DETAIL messages as these are not portable across architectures. +\set VERBOSITY terse +-- Failures for non-BRIN index. CREATE INDEX test1_a_btree ON test1 (a); SELECT brin_page_items(get_raw_page('test1_a_btree', 0), 'test1_a_btree'); ERROR: "test1_a_btree" is not a BRIN index --- Mask DETAIL messages as these are not portable across architectures. -\set VERBOSITY terse +SELECT brin_page_items(get_raw_page('test1_a_btree', 0), 'test1_a_idx'); +ERROR: input page is not a valid BRIN page -- Invalid special area size SELECT brin_page_type(get_raw_page('test1', 0)); ERROR: input page is not a valid BRIN page diff --git a/contrib/pageinspect/expected/gist.out b/contrib/pageinspect/expected/gist.out index eec1fd91cb9..cae739219bd 100644 --- a/contrib/pageinspect/expected/gist.out +++ b/contrib/pageinspect/expected/gist.out @@ -56,14 +56,16 @@ SELECT itemoffset, ctid, itemlen FROM gist_page_items_bytea(get_raw_page('test_g 2 | (2,65535) | 40 (2 rows) --- Failure with non-GiST index. +-- Suppress the DETAIL message, to allow the tests to work across various +-- page sizes and architectures. +\set VERBOSITY terse +-- Failures with non-GiST index. CREATE INDEX test_gist_btree on test_gist(t); SELECT gist_page_items(get_raw_page('test_gist_btree', 0), 'test_gist_btree'); ERROR: "test_gist_btree" is not a GiST index +SELECT gist_page_items(get_raw_page('test_gist_btree', 0), 'test_gist_idx'); +ERROR: input page is not a valid GiST page -- Failure with various modes. --- Suppress the DETAIL message, to allow the tests to work across various --- page sizes and architectures. -\set VERBOSITY terse -- invalid page size SELECT gist_page_items_bytea('aaa'::bytea); ERROR: invalid page size diff --git a/contrib/pageinspect/gistfuncs.c b/contrib/pageinspect/gistfuncs.c index d1c3c321f83..0ae8f7459c1 100644 --- a/contrib/pageinspect/gistfuncs.c +++ b/contrib/pageinspect/gistfuncs.c @@ -34,29 +34,20 @@ PG_FUNCTION_INFO_V1(gist_page_items_bytea); #define ItemPointerGetDatum(X) PointerGetDatum(X) -Datum -gist_page_opaque_info(PG_FUNCTION_ARGS) +static Page verify_gist_page(bytea *raw_page); + +/* + * Verify that the given bytea contains a GIST page or die in the attempt. + * A pointer to the page is returned. + */ +static Page +verify_gist_page(bytea *raw_page) { - bytea *raw_page = PG_GETARG_BYTEA_P(0); - TupleDesc tupdesc; - Page page; + Page page = get_page_from_raw(raw_page); GISTPageOpaque opaq; - HeapTuple resultTuple; - Datum values[4]; - bool nulls[4]; - Datum flags[16]; - int nflags = 0; - uint16 flagbits; - - if (!superuser()) - ereport(ERROR, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("must be superuser to use raw page functions"))); - - page = get_page_from_raw(raw_page); if (PageIsNew(page)) - PG_RETURN_NULL(); + return page; /* verify the special space has the expected size */ if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData))) @@ -76,12 +67,38 @@ gist_page_opaque_info(PG_FUNCTION_ARGS) GIST_PAGE_ID, opaq->gist_page_id))); + return page; +} + +Datum +gist_page_opaque_info(PG_FUNCTION_ARGS) +{ + bytea *raw_page = PG_GETARG_BYTEA_P(0); + TupleDesc tupdesc; + Page page; + HeapTuple resultTuple; + Datum values[4]; + bool nulls[4]; + Datum flags[16]; + int nflags = 0; + uint16 flagbits; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + page = verify_gist_page(raw_page); + + if (PageIsNew(page)) + PG_RETURN_NULL(); + /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); /* Convert the flags bitmask to an array of human-readable names */ - flagbits = opaq->flags; + flagbits = GistPageGetOpaque(page)->flags; if (flagbits & F_LEAF) flags[nflags++] = CStringGetTextDatum("leaf"); if (flagbits & F_DELETED) @@ -103,7 +120,7 @@ gist_page_opaque_info(PG_FUNCTION_ARGS) values[0] = LSNGetDatum(PageGetLSN(page)); values[1] = LSNGetDatum(GistPageGetNSN(page)); - values[2] = Int64GetDatum(opaq->rightlink); + values[2] = Int64GetDatum(GistPageGetOpaque(page)->rightlink); values[3] = PointerGetDatum(construct_array(flags, nflags, TEXTOID, -1, false, TYPALIGN_INT)); @@ -124,7 +141,6 @@ gist_page_items_bytea(PG_FUNCTION_ARGS) Tuplestorestate *tupstore; MemoryContext oldcontext; Page page; - GISTPageOpaque opaq; OffsetNumber offset; OffsetNumber maxoff = InvalidOffsetNumber; @@ -157,29 +173,11 @@ gist_page_items_bytea(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldcontext); - page = get_page_from_raw(raw_page); + page = verify_gist_page(raw_page); if (PageIsNew(page)) PG_RETURN_NULL(); - /* verify the special space has the expected size */ - if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData))) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("input page is not a valid %s page", "GiST"), - errdetail("Expected special size %d, got %d.", - (int) MAXALIGN(sizeof(GISTPageOpaqueData)), - (int) PageGetSpecialSize(page)))); - - opaq = (GISTPageOpaque) PageGetSpecialPointer(page); - if (opaq->gist_page_id != GIST_PAGE_ID) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("input page is not a valid %s page", "GiST"), - errdetail("Expected %08x, got %08x.", - GIST_PAGE_ID, - opaq->gist_page_id))); - /* Avoid bogus PageGetMaxOffsetNumber() call with deleted pages */ if (GistPageIsDeleted(page)) elog(NOTICE, "page is deleted"); @@ -276,7 +274,7 @@ gist_page_items(PG_FUNCTION_ARGS) errmsg("\"%s\" is not a %s index", RelationGetRelationName(indexRel), "GiST"))); - page = get_page_from_raw(raw_page); + page = verify_gist_page(raw_page); if (PageIsNew(page)) { diff --git a/contrib/pageinspect/sql/brin.sql b/contrib/pageinspect/sql/brin.sql index 45098c1ef5e..96b4645187e 100644 --- a/contrib/pageinspect/sql/brin.sql +++ b/contrib/pageinspect/sql/brin.sql @@ -15,12 +15,14 @@ SELECT * FROM brin_revmap_data(get_raw_page('test1_a_idx', 1)) LIMIT 5; SELECT * FROM brin_page_items(get_raw_page('test1_a_idx', 2), 'test1_a_idx') ORDER BY blknum, attnum LIMIT 5; --- Failure for non-BRIN index. +-- Mask DETAIL messages as these are not portable across architectures. +\set VERBOSITY terse + +-- Failures for non-BRIN index. CREATE INDEX test1_a_btree ON test1 (a); SELECT brin_page_items(get_raw_page('test1_a_btree', 0), 'test1_a_btree'); +SELECT brin_page_items(get_raw_page('test1_a_btree', 0), 'test1_a_idx'); --- Mask DETAIL messages as these are not portable across architectures. -\set VERBOSITY terse -- Invalid special area size SELECT brin_page_type(get_raw_page('test1', 0)); SELECT * FROM brin_metapage_info(get_raw_page('test1', 0)); diff --git a/contrib/pageinspect/sql/gist.sql b/contrib/pageinspect/sql/gist.sql index ee46e09053e..963d5d40a3c 100644 --- a/contrib/pageinspect/sql/gist.sql +++ b/contrib/pageinspect/sql/gist.sql @@ -26,14 +26,16 @@ SELECT * FROM gist_page_items(get_raw_page('test_gist_idx', 1), 'test_gist_idx') -- platform-dependent (endianess), so omit the actual key data from the output. SELECT itemoffset, ctid, itemlen FROM gist_page_items_bytea(get_raw_page('test_gist_idx', 0)); --- Failure with non-GiST index. +-- Suppress the DETAIL message, to allow the tests to work across various +-- page sizes and architectures. +\set VERBOSITY terse + +-- Failures with non-GiST index. CREATE INDEX test_gist_btree on test_gist(t); SELECT gist_page_items(get_raw_page('test_gist_btree', 0), 'test_gist_btree'); +SELECT gist_page_items(get_raw_page('test_gist_btree', 0), 'test_gist_idx'); -- Failure with various modes. --- Suppress the DETAIL message, to allow the tests to work across various --- page sizes and architectures. -\set VERBOSITY terse -- invalid page size SELECT gist_page_items_bytea('aaa'::bytea); SELECT gist_page_items('aaa'::bytea, 'test_gist_idx'::regclass); From c31e1165a799788d43939d2ef3f1b707a578cd76 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Mon, 6 Mar 2023 15:07:15 +1300 Subject: [PATCH 16/61] Fix assert failures in parallel SERIALIZABLE READ ONLY. 1. Make sure that we don't decrement SxactGlobalXminCount twice when the SXACT_FLAG_RO_SAFE optimization is reached in a parallel query. This could trigger a sanity check failure in assert builds. Non-assert builds recompute the count in SetNewSxactGlobalXmin(), so the problem was hidden, explaining the lack of field reports. Add a new isolation test to exercise that case. 2. Remove an assertion that the DOOMED flag can't be set on a partially released SERIALIZABLEXACT. Instead, ignore the flag (our transaction was already determined to be read-only safe, and DOOMED is in fact set during partial release, and there was already an assertion that it wasn't set sooner). Improve an existing isolation test so that it reaches that case (previously it wasn't quite testing what it was supposed to be testing; see discussion). Back-patch to 12. Bug #17116. Defects in commit 47a338cf. Reported-by: Alexander Lakhin Discussion: https://postgr.es/m/17116-d6ca217acc180e30%40postgresql.org --- src/backend/storage/lmgr/predicate.c | 20 +++- .../expected/serializable-parallel-2.out | 57 +++-------- .../expected/serializable-parallel-3.out | 97 +++++++++++++++++++ src/test/isolation/isolation_schedule | 1 + .../specs/serializable-parallel-2.spec | 12 ++- .../specs/serializable-parallel-3.spec | 47 +++++++++ 6 files changed, 184 insertions(+), 50 deletions(-) create mode 100644 src/test/isolation/expected/serializable-parallel-3.out create mode 100644 src/test/isolation/specs/serializable-parallel-3.spec diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index f5668bdb4ff..3b0daf723b6 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -3331,6 +3331,7 @@ SetNewSxactGlobalXmin(void) void ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe) { + bool partiallyReleasing = false; bool needToClear; RWConflict conflict, nextConflict, @@ -3431,6 +3432,7 @@ ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe) else { MySerializableXact->flags |= SXACT_FLAG_PARTIALLY_RELEASED; + partiallyReleasing = true; /* ... and proceed to perform the partial release below. */ } } @@ -3681,9 +3683,15 @@ ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe) * serializable transactions completes. We then find the "new oldest" * xmin and purge any transactions which finished before this transaction * was launched. + * + * For parallel queries in read-only transactions, it might run twice. + * We only release the reference on the first call. */ needToClear = false; - if (TransactionIdEquals(MySerializableXact->xmin, PredXact->SxactGlobalXmin)) + if ((partiallyReleasing || + !SxactIsPartiallyReleased(MySerializableXact)) && + TransactionIdEquals(MySerializableXact->xmin, + PredXact->SxactGlobalXmin)) { Assert(PredXact->SxactGlobalXminCount > 0); if (--(PredXact->SxactGlobalXminCount) == 0) @@ -4839,10 +4847,14 @@ PreCommit_CheckForSerializationFailure(void) LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE); - /* Check if someone else has already decided that we need to die */ - if (SxactIsDoomed(MySerializableXact)) + /* + * Check if someone else has already decided that we need to die. Since + * we set our own DOOMED flag when partially releasing, ignore in that + * case. + */ + if (SxactIsDoomed(MySerializableXact) && + !SxactIsPartiallyReleased(MySerializableXact)) { - Assert(!SxactIsPartiallyReleased(MySerializableXact)); LWLockRelease(SerializableXactHashLock); ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), diff --git a/src/test/isolation/expected/serializable-parallel-2.out b/src/test/isolation/expected/serializable-parallel-2.out index 92753ccf39f..904fdd90806 100644 --- a/src/test/isolation/expected/serializable-parallel-2.out +++ b/src/test/isolation/expected/serializable-parallel-2.out @@ -1,50 +1,23 @@ Parsed test spec with 2 sessions starting permutation: s1r s2r1 s1c s2r2 s2c -step s1r: SELECT * FROM foo; - a --- - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 -10 -(10 rows) +step s1r: SELECT COUNT(*) FROM foo; +count +----- + 100 +(1 row) -step s2r1: SELECT * FROM foo; - a --- - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 -10 -(10 rows) +step s2r1: SELECT COUNT(*) FROM foo; +count +----- + 100 +(1 row) step s1c: COMMIT; -step s2r2: SELECT * FROM foo; - a --- - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 -10 -(10 rows) +step s2r2: SELECT COUNT(*) FROM foo; +count +----- + 100 +(1 row) step s2c: COMMIT; diff --git a/src/test/isolation/expected/serializable-parallel-3.out b/src/test/isolation/expected/serializable-parallel-3.out new file mode 100644 index 00000000000..654276a3856 --- /dev/null +++ b/src/test/isolation/expected/serializable-parallel-3.out @@ -0,0 +1,97 @@ +Parsed test spec with 4 sessions + +starting permutation: s1r s3r s2r1 s4r1 s1c s2r2 s3c s4r2 s4c s2c +step s1r: SELECT * FROM foo; + a +-- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +10 +(10 rows) + +step s3r: SELECT * FROM foo; + a +-- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +10 +(10 rows) + +step s2r1: SELECT * FROM foo; + a +-- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +10 +(10 rows) + +step s4r1: SELECT * FROM foo; + a +-- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +10 +(10 rows) + +step s1c: COMMIT; +step s2r2: SELECT * FROM foo; + a +-- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +10 +(10 rows) + +step s3c: COMMIT; +step s4r2: SELECT * FROM foo; + a +-- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +10 +(10 rows) + +step s4c: COMMIT; +step s2c: COMMIT; diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 9122028e15d..3f12f923c02 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -146,6 +146,7 @@ test: plpgsql-toast test: truncate-conflict #test: serializable-parallel #test: serializable-parallel-2 +#test: serializable-parallel-3 #test: prepared-transactions diff --git a/src/test/isolation/specs/serializable-parallel-2.spec b/src/test/isolation/specs/serializable-parallel-2.spec index f3941f78631..c975d96d772 100644 --- a/src/test/isolation/specs/serializable-parallel-2.spec +++ b/src/test/isolation/specs/serializable-parallel-2.spec @@ -3,7 +3,8 @@ setup { - CREATE TABLE foo AS SELECT generate_series(1, 10)::int a; + CREATE TABLE foo AS SELECT generate_series(1, 100)::int a; + CREATE INDEX ON foo(a); ALTER TABLE foo SET (parallel_workers = 2); } @@ -14,7 +15,7 @@ teardown session s1 setup { BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; } -step s1r { SELECT * FROM foo; } +step s1r { SELECT COUNT(*) FROM foo; } step s1c { COMMIT; } session s2 @@ -22,9 +23,12 @@ setup { BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY; SET parallel_setup_cost = 0; SET parallel_tuple_cost = 0; + SET min_parallel_index_scan_size = 0; + SET parallel_leader_participation = off; + SET enable_seqscan = off; } -step s2r1 { SELECT * FROM foo; } -step s2r2 { SELECT * FROM foo; } +step s2r1 { SELECT COUNT(*) FROM foo; } +step s2r2 { SELECT COUNT(*) FROM foo; } step s2c { COMMIT; } permutation s1r s2r1 s1c s2r2 s2c diff --git a/src/test/isolation/specs/serializable-parallel-3.spec b/src/test/isolation/specs/serializable-parallel-3.spec new file mode 100644 index 00000000000..c27298c24ff --- /dev/null +++ b/src/test/isolation/specs/serializable-parallel-3.spec @@ -0,0 +1,47 @@ +# Exercise the case where a read-only serializable transaction has +# SXACT_FLAG_RO_SAFE set in a parallel query. This variant is like +# two copies of #2 running at the same time, and excercises the case +# where another transaction has the same xmin, and it is the oldest. + +setup +{ + CREATE TABLE foo AS SELECT generate_series(1, 10)::int a; + ALTER TABLE foo SET (parallel_workers = 2); +} + +teardown +{ + DROP TABLE foo; +} + +session s1 +setup { BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; } +step s1r { SELECT * FROM foo; } +step s1c { COMMIT; } + +session s2 +setup { + BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY; + SET parallel_setup_cost = 0; + SET parallel_tuple_cost = 0; + } +step s2r1 { SELECT * FROM foo; } +step s2r2 { SELECT * FROM foo; } +step s2c { COMMIT; } + +session s3 +setup { BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; } +step s3r { SELECT * FROM foo; } +step s3c { COMMIT; } + +session s4 +setup { + BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY; + SET parallel_setup_cost = 0; + SET parallel_tuple_cost = 0; + } +step s4r1 { SELECT * FROM foo; } +step s4r2 { SELECT * FROM foo; } +step s4c { COMMIT; } + +permutation s1r s3r s2r1 s4r1 s1c s2r2 s3c s4r2 s4c s2c From 617de9776fef27342afb4626fab3d49457cfd5c2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 7 Mar 2023 18:21:37 -0500 Subject: [PATCH 17/61] Fix more bugs caused by adding columns to the end of a view. If a view is defined atop another view, and then CREATE OR REPLACE VIEW is used to add columns to the lower view, then when the upper view's referencing RTE is expanded by ApplyRetrieveRule we will have a subquery RTE with fewer eref->colnames than output columns. This confuses various code that assumes those lists are always in sync, as they are in plain parser output. We have seen such problems before (cf commit d5b760ecb), and now I think the time has come to do what was speculated about in that commit: let's make ApplyRetrieveRule synthesize some column names to preserve the invariant that holds in parser output. Otherwise we'll be chasing this class of bugs indefinitely. Moreover, it appears from testing that this actually gives us better results in the test case d5b760ecb added, and likely in other corner cases that we lack coverage for. In HEAD, I replaced d5b760ecb's hack to make expandRTE exit early with an elog(ERROR) call, since the case is now presumably unreachable. But it seems like changing that in back branches would bring more risk than benefit, so there I just updated the comment. Per bug #17811 from Alexander Lakhin. Back-patch to all supported branches. Discussion: https://postgr.es/m/17811-d31686b78f0dffc9@postgresql.org f --- .../src/test/regress/expected/alter_table.out | 14 +++--- src/backend/parser/parse_relation.c | 17 ++++--- src/backend/rewrite/rewriteHandler.c | 16 +++++++ src/test/regress/expected/alter_table.out | 45 +++++++++++++++---- src/test/regress/sql/alter_table.sql | 20 ++++++++- .../expected/alter_table.out | 12 ++--- 6 files changed, 96 insertions(+), 28 deletions(-) diff --git a/contrib/pax_storage/src/test/regress/expected/alter_table.out b/contrib/pax_storage/src/test/regress/expected/alter_table.out index fdc79c03c82..8d90e20a98d 100644 --- a/contrib/pax_storage/src/test/regress/expected/alter_table.out +++ b/contrib/pax_storage/src/test/regress/expected/alter_table.out @@ -2628,20 +2628,20 @@ View definition: FROM at_view_1 v1; explain (verbose, costs off) select * from at_view_2; - QUERY PLAN ----------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - Output: bt.id, bt.stuff, (to_json(ROW(bt.id, bt.stuff, NULL))) + Output: bt.id, bt.stuff, (to_json(ROW(bt.id, bt.stuff, 4))) -> Seq Scan on public.at_base_table bt - Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, NULL)) + Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, 4)) Optimizer: Postgres query optimizer Settings: constraint_exclusion=partition (6 rows) select * from at_view_2; - id | stuff | j -----+--------+---------------------------------------- - 23 | skidoo | {"id":23,"stuff":"skidoo","more":null} + id | stuff | j +----+--------+------------------------------------- + 23 | skidoo | {"id":23,"stuff":"skidoo","more":4} (1 row) drop view at_view_2; diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index dfe348c1f40..8e158d63dc1 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -2944,12 +2944,17 @@ expandRTE(RangeTblEntry *rte, int rtindex, int sublevels_up, Assert(varattno == te->resno); /* - * In scenarios where columns have been added to a view - * since the outer query was originally parsed, there can - * be more items in the subquery tlist than the outer - * query expects. We should ignore such extra column(s) - * --- compare the behavior for composite-returning - * functions, in the RTE_FUNCTION case below. + * In a just-parsed subquery RTE, rte->eref->colnames + * should always have exactly as many entries as the + * subquery has non-junk output columns. However, if the + * subquery RTE was created by expansion of a view, + * perhaps the subquery tlist could now have more entries + * than existed when the outer query was parsed. Such + * cases should now be prevented because ApplyRetrieveRule + * will extend the colnames list to match. But out of + * caution, we'll keep the code like this in the back + * branches: just ignore any columns that lack colnames + * entries. */ if (!aliasp_item) break; diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 5fe8136ab9a..3ca8bfff2fb 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -28,6 +28,7 @@ #include "catalog/dependency.h" #include "catalog/pg_type.h" #include "commands/trigger.h" +#include "executor/executor.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -1801,6 +1802,7 @@ ApplyRetrieveRule(Query *parsetree, RangeTblEntry *rte, *subrte; RowMarkClause *rc; + int numCols; if (list_length(rule->actions) != 1) elog(ERROR, "expected just one rule action"); @@ -1960,6 +1962,20 @@ ApplyRetrieveRule(Query *parsetree, rte->updatedCols = NULL; rte->extraUpdatedCols = NULL; + /* + * Since we allow CREATE OR REPLACE VIEW to add columns to a view, the + * rule_action might emit more columns than we expected when the current + * query was parsed. Various places expect rte->eref->colnames to be + * consistent with the non-junk output columns of the subquery, so patch + * things up if necessary by adding some dummy column names. + */ + numCols = ExecCleanTargetListLength(rule_action->targetList); + while (list_length(rte->eref->colnames) < numCols) + { + rte->eref->colnames = lappend(rte->eref->colnames, + makeString(pstrdup("?column?"))); + } + return parsetree; } diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index d29bcc0da6d..cbc578e3586 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -2644,26 +2644,55 @@ View definition: FROM at_view_1 v1; explain (verbose, costs off) select * from at_view_2; - QUERY PLAN ----------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - Output: bt.id, bt.stuff, (to_json(ROW(bt.id, bt.stuff, NULL))) + Output: bt.id, bt.stuff, (to_json(ROW(bt.id, bt.stuff, 4))) -> Seq Scan on public.at_base_table bt - Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, NULL)) + Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, 4)) Optimizer: Postgres query optimizer Settings: constraint_exclusion=partition (6 rows) select * from at_view_2; - id | stuff | j -----+--------+---------------------------------------- - 23 | skidoo | {"id":23,"stuff":"skidoo","more":null} + id | stuff | j +----+--------+------------------------------------- + 23 | skidoo | {"id":23,"stuff":"skidoo","more":4} (1 row) drop view at_view_2; drop view at_view_1; drop table at_base_table; --- check adding a column not iself requiring a rewrite, together with +-- related case (bug #17811) +begin; +create temp table t1 as select * from int8_tbl; +create temp view v1 as select 1::int8 as q1; +create temp view v2 as select * from v1; +create or replace temp view v1 with (security_barrier = true) + as select * from t1; +create temp table log (q1 int8, q2 int8); +create rule v1_upd_rule as on update to v1 + do also insert into log values (new.*); +update v2 set q1 = q1 + 1 where q1 = 123; +select * from t1; + q1 | q2 +------------------+------------------- + 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 + 124 | 456 + 124 | 4567890123456789 +(5 rows) + +select * from log; + q1 | q2 +-----+------------------ + 124 | 456 + 124 | 4567890123456789 +(2 rows) + +rollback; +-- check adding a column not itself requiring a rewrite, together with -- a column requiring a default (bug #16038) -- ensure that rewrites aren't silently optimized away, removing the -- value of the test diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql index 7ddf9f898a8..9da0e5603ea 100644 --- a/src/test/regress/sql/alter_table.sql +++ b/src/test/regress/sql/alter_table.sql @@ -1671,7 +1671,25 @@ drop view at_view_2; drop view at_view_1; drop table at_base_table; --- check adding a column not iself requiring a rewrite, together with +-- related case (bug #17811) +begin; +create temp table t1 as select * from int8_tbl; +create temp view v1 as select 1::int8 as q1; +create temp view v2 as select * from v1; +create or replace temp view v1 with (security_barrier = true) + as select * from t1; + +create temp table log (q1 int8, q2 int8); +create rule v1_upd_rule as on update to v1 + do also insert into log values (new.*); + +update v2 set q1 = q1 + 1 where q1 = 123; + +select * from t1; +select * from log; +rollback; + +-- check adding a column not itself requiring a rewrite, together with -- a column requiring a default (bug #16038) -- ensure that rewrites aren't silently optimized away, removing the diff --git a/src/test/singlenode_regress/expected/alter_table.out b/src/test/singlenode_regress/expected/alter_table.out index 16d6768bbed..f28310d8bce 100644 --- a/src/test/singlenode_regress/expected/alter_table.out +++ b/src/test/singlenode_regress/expected/alter_table.out @@ -2575,18 +2575,18 @@ View definition: FROM at_view_1 v1; explain (verbose, costs off) select * from at_view_2; - QUERY PLAN ----------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------- Seq Scan on public.at_base_table bt - Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, NULL)) + Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, 4)) Settings: constraint_exclusion = 'partition' Optimizer: Postgres query optimizer (4 rows) select * from at_view_2; - id | stuff | j -----+--------+---------------------------------------- - 23 | skidoo | {"id":23,"stuff":"skidoo","more":null} + id | stuff | j +----+--------+------------------------------------- + 23 | skidoo | {"id":23,"stuff":"skidoo","more":4} (1 row) drop view at_view_2; From a523db9b4bbc2e397cba65860f22c0ee0dd5161e Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Tue, 7 Mar 2023 21:36:49 -0800 Subject: [PATCH 18/61] Fix corruption due to vacuum_defer_cleanup_age underflowing 64bit xids When vacuum_defer_cleanup_age is bigger than the current xid, including the epoch, the subtraction of vacuum_defer_cleanup_age would lead to a wrapped around xid. While that normally is not a problem, the subsequent conversion to a 64bit xid results in a 64bit-xid very far into the future. As that xid is used as a horizon to detect whether rows versions are old enough to be removed, that allows removal of rows that are still visible (i.e. corruption). If vacuum_defer_cleanup_age was never changed from the default, there is no chance of this bug occurring. This bug was introduced in dc7420c2c92. A lesser version of it exists in 12-13, introduced by fb5344c969a, affecting only GiST. The 12-13 version of the issue can, in rare cases, lead to pages in a gist index getting recycled too early, potentially causing index entries to be found multiple times. The fix is fairly simple - don't allow vacuum_defer_cleanup_age to retreat further than FirstNormalTransactionId. Patches to make similar bugs easier to find, by adding asserts to the 64bit xid infrastructure, have been proposed, but are not suitable for backpatching. Currently there are no tests for vacuum_defer_cleanup_age. A patch introducing infrastructure to make writing a test easier has been posted to the list. Reported-by: Michail Nikolaev Reviewed-by: Matthias van de Meent Author: Andres Freund Discussion: https://postgr.es/m/20230108002923.cyoser3ttmt63bfn@awork3.anarazel.de Backpatch: 12-, but impact/fix is smaller for 12-13 --- src/backend/storage/ipc/procarray.c | 87 ++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 13 deletions(-) diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 3c7701e15b6..208874f793e 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -387,6 +387,9 @@ static inline void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId l static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid); static void MaintainLatestCompletedXid(TransactionId latestXid); static void MaintainLatestCompletedXidRecovery(TransactionId latestXid); +static void TransactionIdRetreatSafely(TransactionId *xid, + int retreat_by, + FullTransactionId rel); static inline FullTransactionId FullXidRelativeTo(FullTransactionId rel, TransactionId xid); @@ -1995,17 +1998,35 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h, bool updateGlobalVis) * so guc.c should limit it to no more than the xidStopLimit threshold * in varsup.c. Also note that we intentionally don't apply * vacuum_defer_cleanup_age on standby servers. + * + * Need to use TransactionIdRetreatSafely() instead of open-coding the + * subtraction, to prevent creating an xid before + * FirstNormalTransactionId. */ - h->oldest_considered_running = - TransactionIdRetreatedBy(h->oldest_considered_running, - vacuum_defer_cleanup_age); - h->shared_oldest_nonremovable = - TransactionIdRetreatedBy(h->shared_oldest_nonremovable, - vacuum_defer_cleanup_age); - h->data_oldest_nonremovable = - TransactionIdRetreatedBy(h->data_oldest_nonremovable, - vacuum_defer_cleanup_age); - /* defer doesn't apply to temp relations */ + Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running, + h->shared_oldest_nonremovable)); + Assert(TransactionIdPrecedesOrEquals(h->shared_oldest_nonremovable, + h->data_oldest_nonremovable)); + + if (vacuum_defer_cleanup_age > 0) + { + TransactionIdRetreatSafely(&h->oldest_considered_running, + vacuum_defer_cleanup_age, + h->latest_completed); + TransactionIdRetreatSafely(&h->shared_oldest_nonremovable, + vacuum_defer_cleanup_age, + h->latest_completed); + TransactionIdRetreatSafely(&h->data_oldest_nonremovable, + vacuum_defer_cleanup_age, + h->latest_completed); + /* defer doesn't apply to temp relations */ + + + Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running, + h->shared_oldest_nonremovable)); + Assert(TransactionIdPrecedesOrEquals(h->shared_oldest_nonremovable, + h->data_oldest_nonremovable)); + } } /* @@ -3329,8 +3350,10 @@ GetSnapshotData(Snapshot snapshot, DtxContext distributedTransactionContext) oldestfxid = FullXidRelativeTo(latest_completed, oldestxid); /* apply vacuum_defer_cleanup_age */ - def_vis_xid_data = - TransactionIdRetreatedBy(globalxmin, vacuum_defer_cleanup_age); + def_vis_xid_data = globalxmin; + TransactionIdRetreatSafely(&def_vis_xid_data, + vacuum_defer_cleanup_age, + oldestfxid); /* Check whether there's a replication slot requiring an older xmin. */ def_vis_xid_data = @@ -5360,6 +5383,44 @@ GlobalVisCheckRemovableXid(Relation rel, TransactionId xid) return GlobalVisTestIsRemovableXid(state, xid); } +/* + * Safely retract *xid by retreat_by, store the result in *xid. + * + * Need to be careful to prevent *xid from retreating below + * FirstNormalTransactionId during epoch 0. This is important to prevent + * generating xids that cannot be converted to a FullTransactionId without + * wrapping around. + * + * If retreat_by would lead to a too old xid, FirstNormalTransactionId is + * returned instead. + */ +static void +TransactionIdRetreatSafely(TransactionId *xid, int retreat_by, FullTransactionId rel) +{ + TransactionId original_xid = *xid; + FullTransactionId fxid; + uint64 fxid_i; + + Assert(TransactionIdIsNormal(original_xid)); + Assert(retreat_by >= 0); /* relevant GUCs are stored as ints */ + AssertTransactionIdInAllowableRange(original_xid); + + if (retreat_by == 0) + return; + + fxid = FullXidRelativeTo(rel, original_xid); + fxid_i = U64FromFullTransactionId(fxid); + + if ((fxid_i - FirstNormalTransactionId) <= retreat_by) + *xid = FirstNormalTransactionId; + else + { + *xid = TransactionIdRetreatedBy(original_xid, retreat_by); + Assert(TransactionIdIsNormal(*xid)); + Assert(NormalTransactionIdPrecedes(*xid, original_xid)); + } +} + /* * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel). @@ -6681,4 +6742,4 @@ LoopBackendProc(BackendProcCallbackFunction func, void *args) (*func)(proc, args); } LWLockRelease(ProcArrayLock); -} \ No newline at end of file +} From 65416007d93ba8395eb1e85b98dab2bfa22ee394 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Thu, 9 Mar 2023 16:33:24 +1300 Subject: [PATCH 19/61] Fix race in SERIALIZABLE READ ONLY. Commit bdaabb9b started skipping doomed transactions when building the list of possible conflicts for SERIALIZABLE READ ONLY. That makes sense, because doomed transactions won't commit, but a couple of subtle things broke: 1. If all uncommitted r/w transactions are doomed, a READ ONLY transaction would arbitrarily not benefit from the safe snapshot optimization. It would not be taken immediately, and yet no other transaction would set SXACT_FLAG_RO_SAFE later. 2. In the same circumstances but with DEFERRABLE, GetSafeSnapshot() would correctly exit its wait loop without sleeping and then take the optimization in non-assert builds, but assert builds would fail a sanity check that SXACT_FLAG_RO_SAFE had been set by another transaction. This is similar to the case for PredXact->WritableSxactCount == 0. We should opt out immediately if our possibleUnsafeConflicts list is empty after filtering. The code to maintain the serializable global xmin is moved down below the new opt out site, because otherwise we'd have to reverse its effects before returning. Back-patch to all supported releases. Bug #17368. Reported-by: Alexander Lakhin Discussion: https://postgr.es/m/17116-d6ca217acc180e30%40postgresql.org Discussion: https://postgr.es/m/20110707212159.GF76634%40csail.mit.edu --- src/backend/storage/lmgr/predicate.c | 49 ++++++++++++++++++---------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index 3b0daf723b6..ec6f26a72ba 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -1846,24 +1846,6 @@ GetSerializableTransactionSnapshotInt(Snapshot snapshot, return snapshot; } - /* Maintain serializable global xmin info. */ - if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)) - { - Assert(PredXact->SxactGlobalXminCount == 0); - PredXact->SxactGlobalXmin = snapshot->xmin; - PredXact->SxactGlobalXminCount = 1; - SerialSetActiveSerXmin(snapshot->xmin); - } - else if (TransactionIdEquals(snapshot->xmin, PredXact->SxactGlobalXmin)) - { - Assert(PredXact->SxactGlobalXminCount > 0); - PredXact->SxactGlobalXminCount++; - } - else - { - Assert(TransactionIdFollows(snapshot->xmin, PredXact->SxactGlobalXmin)); - } - /* Initialize the structure. */ sxact->vxid = vxid; sxact->SeqNo.lastCommitBeforeSnapshot = PredXact->LastSxactCommitSeqNo; @@ -1900,6 +1882,19 @@ GetSerializableTransactionSnapshotInt(Snapshot snapshot, SetPossibleUnsafeConflict(sxact, othersxact); } } + + /* + * If we didn't find any possibly unsafe conflicts because every + * uncommitted writable transaction turned out to be doomed, then we + * can "opt out" immediately. See comments above the earlier check for + * PredXact->WritableSxactCount == 0. + */ + if (SHMQueueEmpty(&sxact->possibleUnsafeConflicts)) + { + ReleasePredXact(sxact); + LWLockRelease(SerializableXactHashLock); + return snapshot; + } } else { @@ -1908,6 +1903,24 @@ GetSerializableTransactionSnapshotInt(Snapshot snapshot, (MaxBackends + max_prepared_xacts)); } + /* Maintain serializable global xmin info. */ + if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)) + { + Assert(PredXact->SxactGlobalXminCount == 0); + PredXact->SxactGlobalXmin = snapshot->xmin; + PredXact->SxactGlobalXminCount = 1; + SerialSetActiveSerXmin(snapshot->xmin); + } + else if (TransactionIdEquals(snapshot->xmin, PredXact->SxactGlobalXmin)) + { + Assert(PredXact->SxactGlobalXminCount > 0); + PredXact->SxactGlobalXminCount++; + } + else + { + Assert(TransactionIdFollows(snapshot->xmin, PredXact->SxactGlobalXmin)); + } + MySerializableXact = sxact; MyXactDidWrite = false; /* haven't written anything yet */ From de3bedc5aa9db42861a8569f4374b3f763e0cfd4 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 11 Mar 2023 12:15:41 -0500 Subject: [PATCH 20/61] Fix misbehavior in contrib/pg_trgm with an unsatisfiable regex. If the regex compiler can see that a regex is unsatisfiable (for example, '$foo') then it may emit an NFA having no arcs. pg_trgm's packGraph function did the wrong thing in this case; it would access off the end of a work array, and with bad luck could produce a corrupted output data structure causing more problems later. This could end with wrong answers or crashes in queries using a pg_trgm GIN or GiST index with such a regex. Fix by not trying to de-duplicate if there aren't at least 2 arcs. Per bug #17830 from Alexander Lakhin. Back-patch to all supported branches. Discussion: https://postgr.es/m/17830-57ff5f89bdb02b09@postgresql.org --- contrib/pg_trgm/expected/pg_word_trgm.out | 6 ++++++ contrib/pg_trgm/sql/pg_word_trgm.sql | 3 +++ contrib/pg_trgm/trgm_regexp.c | 26 ++++++++++++++--------- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/contrib/pg_trgm/expected/pg_word_trgm.out b/contrib/pg_trgm/expected/pg_word_trgm.out index 9f0ca502a6d..4c6b49934b6 100644 --- a/contrib/pg_trgm/expected/pg_word_trgm.out +++ b/contrib/pg_trgm/expected/pg_word_trgm.out @@ -1048,3 +1048,9 @@ select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kaban Waikala | 0.3 (89 rows) +-- test unsatisfiable pattern +select * from test_trgm2 where t ~ '.*$x'; + t +--- +(0 rows) + diff --git a/contrib/pg_trgm/sql/pg_word_trgm.sql b/contrib/pg_trgm/sql/pg_word_trgm.sql index d9fa1c55e5e..d2ada49133a 100644 --- a/contrib/pg_trgm/sql/pg_word_trgm.sql +++ b/contrib/pg_trgm/sql/pg_word_trgm.sql @@ -43,3 +43,6 @@ select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + +-- test unsatisfiable pattern +select * from test_trgm2 where t ~ '.*$x'; diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c index 71e4ebee4e9..3485a725cde 100644 --- a/contrib/pg_trgm/trgm_regexp.c +++ b/contrib/pg_trgm/trgm_regexp.c @@ -1944,9 +1944,7 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext) arcsCount; HASH_SEQ_STATUS scan_status; TrgmState *state; - TrgmPackArcInfo *arcs, - *p1, - *p2; + TrgmPackArcInfo *arcs; TrgmPackedArc *packedArcs; TrgmPackedGraph *result; int i, @@ -2018,17 +2016,25 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext) qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp); /* We could have duplicates because states were merged. Remove them. */ - /* p1 is probe point, p2 is last known non-duplicate. */ - p2 = arcs; - for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++) + if (arcIndex > 1) { - if (packArcInfoCmp(p1, p2) > 0) + /* p1 is probe point, p2 is last known non-duplicate. */ + TrgmPackArcInfo *p1, + *p2; + + p2 = arcs; + for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++) { - p2++; - *p2 = *p1; + if (packArcInfoCmp(p1, p2) > 0) + { + p2++; + *p2 = *p1; + } } + arcsCount = (p2 - arcs) + 1; } - arcsCount = (p2 - arcs) + 1; + else + arcsCount = arcIndex; /* Create packed representation */ result = (TrgmPackedGraph *) From 1b0fd4024745246123b76c273240fbf5aabc09f5 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Sat, 11 Mar 2023 14:12:51 -0800 Subject: [PATCH 21/61] amcheck: Fix ordering bug in update_cached_xid_range() The initialization order in update_cached_xid_range() was wrong, calling FullTransactionIdFromXidAndCtx() before setting ->next_xid. FullTransactionIdFromXidAndCtx() uses ->next_xid. In most situations this will not cause visible issues, because the next call to update_cached_xid_range() will use a less wrong ->next_xid. It's rare that xids advance fast enough for this to be a problem. Found while adding more asserts to the 64bit xid infrastructure. Reviewed-by: Mark Dilger Discussion: https://postgr.es/m/20230108002923.cyoser3ttmt63bfn@awork3.anarazel.de Backpatch: 14-, where heapam verification was introduced --- contrib/amcheck/verify_heapam.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index 9abeca607d7..6614b609e83 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -1579,6 +1579,9 @@ FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx) { uint32 epoch; + Assert(TransactionIdIsNormal(ctx->next_xid)); + Assert(FullTransactionIdIsNormal(ctx->next_fxid)); + if (!TransactionIdIsNormal(xid)) return FullTransactionIdFromEpochAndXid(0, xid); epoch = EpochFromFullTransactionId(ctx->next_fxid); @@ -1600,8 +1603,8 @@ update_cached_xid_range(HeapCheckContext *ctx) LWLockRelease(XidGenLock); /* And compute alternate versions of the same */ - ctx->oldest_fxid = FullTransactionIdFromXidAndCtx(ctx->oldest_xid, ctx); ctx->next_xid = XidFromFullTransactionId(ctx->next_fxid); + ctx->oldest_fxid = FullTransactionIdFromXidAndCtx(ctx->oldest_xid, ctx); } /* From f6784db9564cd67758e527a598c2bf55fec1ae4c Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Sat, 11 Mar 2023 14:12:51 -0800 Subject: [PATCH 22/61] amcheck: Fix FullTransactionIdFromXidAndCtx() for xids before epoch 0 64bit xids can't represent xids before epoch 0 (see also be504a3e974). When FullTransactionIdFromXidAndCtx() was passed such an xid, it'd create a 64bit xid far into the future. Noticed while adding assertions in the course of investigating be504a3e974, as amcheck's test create such xids. To fix the issue, just return FirstNormalFullTransactionId in this case. A freshly initdb'd cluster already has a newer horizon. The most minimal version of this would make the messages for some detected corruptions differently inaccurate. To make those cases accurate, switch FullTransactionIdFromXidAndCtx() to use the 32bit modulo difference between xid and nextxid to compute the 64bit xid, yielding sensible "in the future" / "in the past" answers. Reviewed-by: Mark Dilger Discussion: https://postgr.es/m/20230108002923.cyoser3ttmt63bfn@awork3.anarazel.de Backpatch: 14-, where heapam verification was introduced --- contrib/amcheck/verify_heapam.c | 33 +++++++++++++++++++---- src/bin/pg_amcheck/t/004_verify_heapam.pl | 30 ++++++++++++++------- 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index 6614b609e83..f7964b78173 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -1577,17 +1577,40 @@ check_tuple(HeapCheckContext *ctx) static FullTransactionId FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx) { - uint32 epoch; + uint64 nextfxid_i; + int32 diff; + FullTransactionId fxid; Assert(TransactionIdIsNormal(ctx->next_xid)); Assert(FullTransactionIdIsNormal(ctx->next_fxid)); + Assert(XidFromFullTransactionId(ctx->next_fxid) == ctx->next_xid); if (!TransactionIdIsNormal(xid)) return FullTransactionIdFromEpochAndXid(0, xid); - epoch = EpochFromFullTransactionId(ctx->next_fxid); - if (xid > ctx->next_xid) - epoch--; - return FullTransactionIdFromEpochAndXid(epoch, xid); + + nextfxid_i = U64FromFullTransactionId(ctx->next_fxid); + + /* compute the 32bit modulo difference */ + diff = (int32) (ctx->next_xid - xid); + + /* + * In cases of corruption we might see a 32bit xid that is before epoch + * 0. We can't represent that as a 64bit xid, due to 64bit xids being + * unsigned integers, without the modulo arithmetic of 32bit xid. There's + * no really nice way to deal with that, but it works ok enough to use + * FirstNormalFullTransactionId in that case, as a freshly initdb'd + * cluster already has a newer horizon. + */ + if (diff > 0 && (nextfxid_i - FirstNormalTransactionId) < (int64) diff) + { + Assert(EpochFromFullTransactionId(ctx->next_fxid) == 0); + fxid = FirstNormalFullTransactionId; + } + else + fxid = FullTransactionIdFromU64(nextfxid_i - diff); + + Assert(FullTransactionIdIsNormal(fxid)); + return fxid; } /* diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl index b603efad929..4cadb837730 100644 --- a/src/bin/pg_amcheck/t/004_verify_heapam.pl +++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl @@ -217,7 +217,7 @@ sub write_tuple my $relpath = "$pgdata/$rel"; # Insert data and freeze public.test -use constant ROWCOUNT => 16; +use constant ROWCOUNT => 17; $node->safe_psql( 'postgres', qq( INSERT INTO public.test (a, b, c) @@ -296,7 +296,7 @@ sub write_tuple $node->start; # Ok, Xids and page layout look ok. We can run corruption tests. -plan tests => 19; +plan tests => 20; # Check that pg_amcheck runs against the uncorrupted table without error. $node->command_ok( @@ -379,23 +379,24 @@ sub header elsif ($offnum == 3) { # Corruptly set xmin < datfrozenxid, further back, noting circularity - # of xid comparison. For a new cluster with epoch = 0, the corrupt - # xmin will be interpreted as in the future - $tup->{t_xmin} = 4026531839; + # of xid comparison. + my $xmin = 4026531839; + $tup->{t_xmin} = $xmin; $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED; $tup->{t_infomask} &= ~HEAP_XMIN_INVALID; push @expected, - qr/${$header}xmin 4026531839 equals or exceeds next valid transaction ID 0:\d+/; + qr/${$header}xmin ${xmin} precedes oldest valid transaction ID 0:\d+/; } elsif ($offnum == 4) { # Corruptly set xmax < relminmxid; - $tup->{t_xmax} = 4026531839; + my $xmax = 4026531839; + $tup->{t_xmax} = $xmax; $tup->{t_infomask} &= ~HEAP_XMAX_INVALID; push @expected, - qr/${$header}xmax 4026531839 equals or exceeds next valid transaction ID 0:\d+/; + qr/${$header}xmax ${xmax} precedes oldest valid transaction ID 0:\d+/; } elsif ($offnum == 5) { @@ -503,7 +504,7 @@ sub header push @expected, qr/${header}multitransaction ID 4 equals or exceeds next valid multitransaction ID 1/; } - elsif ($offnum == 15) # Last offnum must equal ROWCOUNT + elsif ($offnum == 15) { # Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI $tup->{t_infomask} |= HEAP_XMAX_COMMITTED; @@ -513,6 +514,17 @@ sub header push @expected, qr/${header}multitransaction ID 4000000000 precedes relation minimum multitransaction ID threshold 1/; } + elsif ($offnum == 16) # Last offnum must equal ROWCOUNT + { + # Corruptly set xmin > next_xid to be in the future. + my $xmin = 123456; + $tup->{t_xmin} = $xmin; + $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED; + $tup->{t_infomask} &= ~HEAP_XMIN_INVALID; + + push @expected, + qr/${$header}xmin ${xmin} equals or exceeds next valid transaction ID 0:\d+/; + } write_tuple($file, $offset, $tup); } close($file) From 567ccb2fe27cf283072ac44c35c56bf834363744 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Sun, 12 Mar 2023 09:00:32 -0400 Subject: [PATCH 23/61] Mark unsafe_tests module as not runnable with installcheck This was an omission in the original creation of the module. Also slightly adjust some wording to avoid a double "is". Backpatch the non-meson piece of this to release 12, where the module was introduced. Discussion: https://postgr.es/m/be869e1c-8e3f-4cde-8609-212c899cccf9@dunslane.net --- src/test/modules/unsafe_tests/Makefile | 3 +++ src/test/modules/unsafe_tests/README | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/test/modules/unsafe_tests/Makefile b/src/test/modules/unsafe_tests/Makefile index 3ecf5fcfc5b..1d989007bd5 100644 --- a/src/test/modules/unsafe_tests/Makefile +++ b/src/test/modules/unsafe_tests/Makefile @@ -2,6 +2,9 @@ REGRESS = rolenames alter_system_table +# the whole point of these tests is to not run installcheck +NO_INSTALLCHECK = 1 + ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) diff --git a/src/test/modules/unsafe_tests/README b/src/test/modules/unsafe_tests/README index a7e5b2a04f5..d9dbd038b95 100644 --- a/src/test/modules/unsafe_tests/README +++ b/src/test/modules/unsafe_tests/README @@ -1,6 +1,6 @@ This directory doesn't actually contain any extension module. -What it is is a home for regression tests that we don't want to run +Instead it is a home for regression tests that we don't want to run during "make installcheck" because they could have side-effects that seem undesirable for a production installation. From f13a802b6ea842cb24caed05a3a66b0963f09af2 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 13 Mar 2023 16:36:31 +0900 Subject: [PATCH 24/61] Fix inconsistent error handling for GSS encryption in PQconnectPoll() The error cases for TLS and GSS encryption were inconsistent. After TLS fails, the connection is marked as dead and follow-up calls of PQconnectPoll() would return immediately, but GSS encryption was not doing that, so the connection would still have been allowed to enter the GSS handling code. This was handled incorrectly when gssencmode was set to "require". "prefer" was working correctly, and this could not happen under "disable" as GSS encryption would not be attempted. This commit makes the error handling of GSS encryption on par with TLS portion, fixing the case of gssencmode=require. Reported-by: Jacob Champion Author: Michael Paquier Reviewed-by: Jacob Champion, Stephen Frost Discussion: https://postgr.es/m/23787477-5fe1-a161-6d2a-e459f74c4713@timescale.com Backpatch-through: 12 --- src/interfaces/libpq/fe-connect.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index 7f3dfd462a6..46e8540004e 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -3323,17 +3323,22 @@ PQconnectPoll(PGconn *conn) conn->status = CONNECTION_MADE; return PGRES_POLLING_WRITING; } - else if (pollres == PGRES_POLLING_FAILED && - conn->gssencmode[0] == 'p') + else if (pollres == PGRES_POLLING_FAILED) { - /* - * We failed, but we can retry on "prefer". Have to drop - * the current connection to do so, though. - */ - conn->try_gss = false; - need_new_connection = true; - goto keep_going; + if (conn->gssencmode[0] == 'p') + { + /* + * We failed, but we can retry on "prefer". Have to + * drop the current connection to do so, though. + */ + conn->try_gss = false; + need_new_connection = true; + goto keep_going; + } + /* Else it's a hard failure */ + goto error_return; } + /* Else, return POLLING_READING or POLLING_WRITING status */ return pollres; #else /* !ENABLE_GSS */ /* unreachable */ From a1490b540a3dbe8097b8e2c4551037b2f8da439c Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 13 Mar 2023 15:19:00 -0400 Subject: [PATCH 25/61] Fix JSON error reporting for many cases of erroneous string values. The majority of error exit cases in json_lex_string() failed to set lex->token_terminator, causing problems for the error context reporting code: it would see token_terminator less than token_start and do something more or less nuts. In v14 and up the end result could be as bad as a crash in report_json_context(). Older versions accidentally avoided that fate; but all versions produce error context lines that are far less useful than intended, because they'd stop at the end of the prior token instead of continuing to where the actually-bad input is. To fix, invent some macros that make it less notationally painful to do the right thing. Also add documentation about what the function is actually required to do; and in >= v14, add an assertion in report_json_context about token_terminator being sufficiently far advanced. Per report from Nikolay Shaplov. Back-patch to all supported versions. Discussion: https://postgr.es/m/7332649.x5DLKWyVIX@thinkpad-pgpro --- src/backend/utils/adt/jsonfuncs.c | 1 + src/common/jsonapi.c | 77 +++++++++++-------- src/test/regress/expected/json_encoding.out | 24 +++--- src/test/regress/expected/json_encoding_1.out | 24 +++--- 4 files changed, 72 insertions(+), 54 deletions(-) diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index b342c81f27b..f6a074aa7d0 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -656,6 +656,7 @@ report_json_context(JsonLexContext *lex) line_start = lex->line_start; context_start = line_start; context_end = lex->token_terminator; + Assert(context_end >= context_start); /* Advance until we are close enough to context_end */ while (context_end - context_start >= 50) diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index ade13aed3a4..3d0fbfa7be1 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -675,6 +675,14 @@ json_lex(JsonLexContext *lex) /* * The next token in the input stream is known to be a string; lex it. + * + * If lex->strval isn't NULL, fill it with the decoded string. + * Set lex->token_terminator to the end of the decoded input, and in + * success cases, transfer its previous value to lex->prev_token_terminator. + * Return JSON_SUCCESS or an error code. + * + * Note: be careful that all error exits advance lex->token_terminator + * to the point after the character we detected the error on. */ static inline JsonParseErrorType json_lex_string(JsonLexContext *lex) @@ -683,6 +691,19 @@ json_lex_string(JsonLexContext *lex) int len; int hi_surrogate = -1; + /* Convenience macros for error exits */ +#define FAIL_AT_CHAR_START(code) \ + do { \ + lex->token_terminator = s; \ + return code; \ + } while (0) +#define FAIL_AT_CHAR_END(code) \ + do { \ + lex->token_terminator = \ + s + pg_encoding_mblen_bounded(lex->input_encoding, s); \ + return code; \ + } while (0) + if (lex->strval != NULL) resetStringInfo(lex->strval); @@ -695,18 +716,14 @@ json_lex_string(JsonLexContext *lex) len++; /* Premature end of the string. */ if (len >= lex->input_length) - { - lex->token_terminator = s; - return JSON_INVALID_TOKEN; - } + FAIL_AT_CHAR_START(JSON_INVALID_TOKEN); else if (*s == '"') break; else if ((unsigned char) *s < 32) { /* Per RFC4627, these characters MUST be escaped. */ /* Since *s isn't printable, exclude it from the context string */ - lex->token_terminator = s; - return JSON_ESCAPING_REQUIRED; + FAIL_AT_CHAR_START(JSON_ESCAPING_REQUIRED); } else if (*s == '\\') { @@ -714,10 +731,7 @@ json_lex_string(JsonLexContext *lex) s++; len++; if (len >= lex->input_length) - { - lex->token_terminator = s; - return JSON_INVALID_TOKEN; - } + FAIL_AT_CHAR_START(JSON_INVALID_TOKEN); else if (*s == 'u') { int i; @@ -728,10 +742,7 @@ json_lex_string(JsonLexContext *lex) s++; len++; if (len >= lex->input_length) - { - lex->token_terminator = s; - return JSON_INVALID_TOKEN; - } + FAIL_AT_CHAR_START(JSON_INVALID_TOKEN); else if (*s >= '0' && *s <= '9') ch = (ch * 16) + (*s - '0'); else if (*s >= 'a' && *s <= 'f') @@ -739,10 +750,7 @@ json_lex_string(JsonLexContext *lex) else if (*s >= 'A' && *s <= 'F') ch = (ch * 16) + (*s - 'A') + 10; else - { - lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s); - return JSON_UNICODE_ESCAPE_FORMAT; - } + FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT); } if (lex->strval != NULL) { @@ -752,20 +760,20 @@ json_lex_string(JsonLexContext *lex) if (is_utf16_surrogate_first(ch)) { if (hi_surrogate != -1) - return JSON_UNICODE_HIGH_SURROGATE; + FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE); hi_surrogate = ch; continue; } else if (is_utf16_surrogate_second(ch)) { if (hi_surrogate == -1) - return JSON_UNICODE_LOW_SURROGATE; + FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE); ch = surrogate_pair_to_codepoint(hi_surrogate, ch); hi_surrogate = -1; } if (hi_surrogate != -1) - return JSON_UNICODE_LOW_SURROGATE; + FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE); /* * Reject invalid cases. We can't have a value above @@ -775,7 +783,7 @@ json_lex_string(JsonLexContext *lex) if (ch == 0) { /* We can't allow this, since our TEXT type doesn't */ - return JSON_UNICODE_CODE_POINT_ZERO; + FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO); } /* @@ -812,14 +820,14 @@ json_lex_string(JsonLexContext *lex) appendStringInfoChar(lex->strval, (char) ch); } else - return JSON_UNICODE_HIGH_ESCAPE; + FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE); #endif /* FRONTEND */ } } else if (lex->strval != NULL) { if (hi_surrogate != -1) - return JSON_UNICODE_LOW_SURROGATE; + FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE); switch (*s) { @@ -844,10 +852,14 @@ json_lex_string(JsonLexContext *lex) appendStringInfoChar(lex->strval, '\t'); break; default: - /* Not a valid string escape, so signal error. */ + + /* + * Not a valid string escape, so signal error. We + * adjust token_start so that just the escape sequence + * is reported, not the whole string. + */ lex->token_start = s; - lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s); - return JSON_ESCAPING_INVALID; + FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID); } } else if (strchr("\"\\/bfnrt", *s) == NULL) @@ -860,15 +872,14 @@ json_lex_string(JsonLexContext *lex) * shown it's not a performance win. */ lex->token_start = s; - lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s); - return JSON_ESCAPING_INVALID; + FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID); } } else if (lex->strval != NULL) { if (hi_surrogate != -1) - return JSON_UNICODE_LOW_SURROGATE; + FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE); appendStringInfoChar(lex->strval, *s); } @@ -876,12 +887,18 @@ json_lex_string(JsonLexContext *lex) } if (hi_surrogate != -1) + { + lex->token_terminator = s + 1; return JSON_UNICODE_LOW_SURROGATE; + } /* Hooray, we found the end of the string! */ lex->prev_token_terminator = lex->token_terminator; lex->token_terminator = s + 1; return JSON_SUCCESS; + +#undef FAIL_AT_CHAR_START +#undef FAIL_AT_CHAR_END } /* diff --git a/src/test/regress/expected/json_encoding.out b/src/test/regress/expected/json_encoding.out index f343f74fe18..fa41b401030 100644 --- a/src/test/regress/expected/json_encoding.out +++ b/src/test/regress/expected/json_encoding.out @@ -56,19 +56,19 @@ select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8; select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row ERROR: invalid input syntax for type json DETAIL: Unicode high surrogate must not follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d... select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order ERROR: invalid input syntax for type json DETAIL: Unicode low surrogate must follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ude04... select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate ERROR: invalid input syntax for type json DETAIL: Unicode low surrogate must follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ud83dX... select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate ERROR: invalid input syntax for type json DETAIL: Unicode low surrogate must follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ude04... --handling of simple unicode escapes select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; correct_in_utf8 @@ -121,7 +121,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails; ERROR: unsupported Unicode escape sequence DETAIL: \u0000 cannot be converted to text. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "null \u0000... select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; not_an_escape -------------------- @@ -159,7 +159,7 @@ ERROR: unsupported Unicode escape sequence LINE 1: SELECT '"\u0000"'::jsonb; ^ DETAIL: \u0000 cannot be converted to text. -CONTEXT: JSON data, line 1: ... +CONTEXT: JSON data, line 1: "\u0000... -- use octet_length here so we don't get an odd unicode char in the -- output SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK @@ -180,25 +180,25 @@ ERROR: invalid input syntax for type json LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; ^ DETAIL: Unicode high surrogate must not follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d... SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order ERROR: invalid input syntax for type json LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; ^ DETAIL: Unicode low surrogate must follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ude04... SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate ERROR: invalid input syntax for type json LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; ^ DETAIL: Unicode low surrogate must follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ud83dX... SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate ERROR: invalid input syntax for type json LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; ^ DETAIL: Unicode low surrogate must follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ude04... -- handling of simple unicode escapes SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; correct_in_utf8 @@ -223,7 +223,7 @@ ERROR: unsupported Unicode escape sequence LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; ^ DETAIL: \u0000 cannot be converted to text. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "null \u0000... SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape; not_an_escape ------------------------------ @@ -253,7 +253,7 @@ ERROR: unsupported Unicode escape sequence LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai... ^ DETAIL: \u0000 cannot be converted to text. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "null \u0000... SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; not_an_escape -------------------- diff --git a/src/test/regress/expected/json_encoding_1.out b/src/test/regress/expected/json_encoding_1.out index e2fc131b0fa..938f8e24aaf 100644 --- a/src/test/regress/expected/json_encoding_1.out +++ b/src/test/regress/expected/json_encoding_1.out @@ -52,19 +52,19 @@ ERROR: conversion between UTF8 and SQL_ASCII is not supported select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row ERROR: invalid input syntax for type json DETAIL: Unicode high surrogate must not follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d... select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order ERROR: invalid input syntax for type json DETAIL: Unicode low surrogate must follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ude04... select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate ERROR: invalid input syntax for type json DETAIL: Unicode low surrogate must follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ud83dX... select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate ERROR: invalid input syntax for type json DETAIL: Unicode low surrogate must follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ude04... --handling of simple unicode escapes select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; correct_in_utf8 @@ -113,7 +113,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape; select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails; ERROR: unsupported Unicode escape sequence DETAIL: \u0000 cannot be converted to text. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "null \u0000... select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; not_an_escape -------------------- @@ -151,7 +151,7 @@ ERROR: unsupported Unicode escape sequence LINE 1: SELECT '"\u0000"'::jsonb; ^ DETAIL: \u0000 cannot be converted to text. -CONTEXT: JSON data, line 1: ... +CONTEXT: JSON data, line 1: "\u0000... -- use octet_length here so we don't get an odd unicode char in the -- output SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK @@ -168,25 +168,25 @@ ERROR: invalid input syntax for type json LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; ^ DETAIL: Unicode high surrogate must not follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d... SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order ERROR: invalid input syntax for type json LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; ^ DETAIL: Unicode low surrogate must follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ude04... SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate ERROR: invalid input syntax for type json LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; ^ DETAIL: Unicode low surrogate must follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ud83dX... SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate ERROR: invalid input syntax for type json LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; ^ DETAIL: Unicode low surrogate must follow a high surrogate. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "\ude04... -- handling of simple unicode escapes SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; ERROR: conversion between UTF8 and SQL_ASCII is not supported @@ -209,7 +209,7 @@ ERROR: unsupported Unicode escape sequence LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails; ^ DETAIL: \u0000 cannot be converted to text. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "null \u0000... SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape; not_an_escape ------------------------------ @@ -237,7 +237,7 @@ ERROR: unsupported Unicode escape sequence LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai... ^ DETAIL: \u0000 cannot be converted to text. -CONTEXT: JSON data, line 1: { "a":... +CONTEXT: JSON data, line 1: { "a": "null \u0000... SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape; not_an_escape -------------------- From b8ac6460e7d3a5372c01ad4faf1c83214b0cb283 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 14 Mar 2023 11:10:45 -0400 Subject: [PATCH 26/61] Remove unnecessary code in dependency_is_compatible_expression(). Scanning the expression for compatible Vars isn't really necessary, because the subsequent match against StatisticExtInfo entries will eliminate expressions containing other Vars just fine. Moreover, this code hadn't stopped to think about what to do with PlaceHolderVars or Aggrefs in the clause; and at least for the PHV case, that demonstrably leads to failures. Rather than work out whether it's reasonable to ignore those, let's just remove the whole stanza. Per report from Richard Guo. Back-patch to v14 where this code was added. Discussion: https://postgr.es/m/CAMbWs48Mmvm-acGevXuwpB=g5JMqVSL6i9z5UaJyLGJqa-XPAA@mail.gmail.com --- src/backend/statistics/dependencies.c | 28 +++------------------------ 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index 8b3d560cb20..06d930f9164 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -1170,13 +1170,12 @@ clauselist_apply_dependencies(PlannerInfo *root, List *clauses, * Determines if the expression is compatible with functional dependencies * * Similar to dependency_is_compatible_clause, but doesn't enforce that the - * expression is a simple Var. OTOH we check that there's at least one - * statistics object matching the expression. + * expression is a simple Var. On success, return the matching statistics + * expression into *expr. */ static bool dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr) { - List *vars; ListCell *lc, *lc2; Node *clause_expr; @@ -1324,29 +1323,8 @@ dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, N if (IsA(clause_expr, RelabelType)) clause_expr = (Node *) ((RelabelType *) clause_expr)->arg; - vars = pull_var_clause(clause_expr, 0); - - foreach(lc, vars) - { - Var *var = (Var *) lfirst(lc); - - /* Ensure Var is from the correct relation */ - if (var->varno != relid) - return false; - - /* We also better ensure the Var is from the current level */ - if (var->varlevelsup != 0) - return false; - - /* Also ignore system attributes (we don't allow stats on those) */ - if (!AttrNumberIsForUserDefinedAttr(var->varattno)) - return false; - } - /* - * Check if we actually have a matching statistics for the expression. - * - * XXX Maybe this is an overkill. We'll eliminate the expressions later. + * Search for a matching statistics expression. */ foreach(lc, statlist) { From 99266f720a97ca76b0b085b921f68294da98c760 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 14 Mar 2023 19:17:31 -0400 Subject: [PATCH 27/61] Fix corner case bug in numeric to_char() some more. The band-aid applied in commit f0bedf3e4 turns out to still need some work: it made sure we didn't set Np->last_relevant too small (to the left of the decimal point), but it didn't prevent setting it too large (off the end of the partially-converted string). This could result in fetching data beyond the end of the allocated space, which with very bad luck could cause a SIGSEGV, though I don't see any hazard of interesting memory disclosure. Per bug #17839 from Thiago Nunes. The bug's pretty ancient, so back-patch to all supported versions. Discussion: https://postgr.es/m/17839-aada50db24d7b0da@postgresql.org --- src/backend/utils/adt/formatting.c | 11 +++++++++-- src/test/regress/expected/numeric.out | 6 ++++++ src/test/regress/sql/numeric.sql | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 7d58e28cd1e..a7a426c3034 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -5719,13 +5719,20 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, /* * If any '0' specifiers are present, make sure we don't strip - * those digits. + * those digits. But don't advance last_relevant beyond the last + * character of the Np->number string, which is a hazard if the + * number got shortened due to precision limitations. */ if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces) { + int last_zero_pos; char *last_zero; - last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces); + /* note that Np->number cannot be zero-length here */ + last_zero_pos = strlen(Np->number) - 1; + last_zero_pos = Min(last_zero_pos, + Np->Num->zero_end - Np->out_pre_spaces); + last_zero = Np->number + last_zero_pos; if (Np->last_relevant < last_zero) Np->last_relevant = last_zero; } diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out index bea33181bee..2f2818e001c 100644 --- a/src/test/regress/expected/numeric.out +++ b/src/test/regress/expected/numeric.out @@ -1929,6 +1929,12 @@ SELECT to_char('100'::numeric, 'FM999'); 100 (1 row) +SELECT to_char('12345678901'::float8, 'FM9999999999D9999900000000000000000'); + to_char +----------------- + ##########.#### +(1 row) + -- Check parsing of literal text in a format string SELECT to_char('100'::numeric, 'foo999'); to_char diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql index 9233c666d4b..56294da5ae9 100644 --- a/src/test/regress/sql/numeric.sql +++ b/src/test/regress/sql/numeric.sql @@ -979,6 +979,7 @@ FROM v; SELECT to_char('100'::numeric, 'FM999.9'); SELECT to_char('100'::numeric, 'FM999.'); SELECT to_char('100'::numeric, 'FM999'); +SELECT to_char('12345678901'::float8, 'FM9999999999D9999900000000000000000'); -- Check parsing of literal text in a format string SELECT to_char('100'::numeric, 'foo999'); From d657b4137b41f39ea62390ea91136e2049a4808d Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Wed, 15 Mar 2023 13:17:18 +1300 Subject: [PATCH 28/61] Fix waitpid() emulation on Windows. Our waitpid() emulation didn't prevent a PID from being recycled by the OS before the call to waitpid(). The postmaster could finish up tracking more than one child process with the same PID, and confuse them. Fix, by moving the guts of pgwin32_deadchild_callback() into waitpid(), so that resources are released synchronously. The process and PID continue to exist until we close the process handle, which only happens once we're ready to adjust our book-keeping of running children. This seems to explain a couple of failures on CI. It had never been reported before, despite the code being as old as the Windows port. Perhaps Windows started recycling PIDs more rapidly, or perhaps timing changes due to commit 7389aad6 made it more likely to break. Thanks to Alexander Lakhin for analysis and Andres Freund for tracking down the root cause. Back-patch to all supported branches. Reported-by: Andres Freund Discussion: https://postgr.es/m/20230208012852.bvkn2am4h4iqjogq%40awork3.anarazel.de --- src/backend/postmaster/postmaster.c | 70 ++++++++++++++++------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 8f21a90f391..aab9d97da83 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -5455,7 +5455,7 @@ internal_forkexec(int argc, char *argv[], Port *port) (errmsg_internal("could not register process for wait: error code %lu", GetLastError()))); - /* Don't close pi.hProcess here - the wait thread needs access to it */ + /* Don't close pi.hProcess here - waitpid() needs access to it */ CloseHandle(pi.hThread); @@ -7288,36 +7288,21 @@ ShmemBackendArrayRemove(Backend *bn) static pid_t waitpid(pid_t pid, int *exitstatus, int options) { + win32_deadchild_waitinfo *childinfo; + DWORD exitcode; DWORD dwd; ULONG_PTR key; OVERLAPPED *ovl; - /* - * Check if there are any dead children. If there are, return the pid of - * the first one that died. - */ - if (GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0)) + /* Try to consume one win32_deadchild_waitinfo from the queue. */ + if (!GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0)) { - *exitstatus = (int) key; - return dwd; + errno = EAGAIN; + return -1; } - return -1; -} - -/* - * Note! Code below executes on a thread pool! All operations must - * be thread safe! Note that elog() and friends must *not* be used. - */ -static void WINAPI -pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired) -{ - win32_deadchild_waitinfo *childinfo = (win32_deadchild_waitinfo *) lpParameter; - DWORD exitcode; - - if (TimerOrWaitFired) - return; /* timeout. Should never happen, since we use - * INFINITE as timeout value. */ + childinfo = (win32_deadchild_waitinfo *) key; + pid = childinfo->procId; /* * Remove handle from wait - required even though it's set to wait only @@ -7333,13 +7318,11 @@ pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired) write_stderr("could not read exit code for process\n"); exitcode = 255; } - - if (!PostQueuedCompletionStatus(win32ChildQueue, childinfo->procId, (ULONG_PTR) exitcode, NULL)) - write_stderr("could not post child completion status\n"); + *exitstatus = exitcode; /* - * Handle is per-process, so we close it here instead of in the - * originating thread + * Close the process handle. Only after this point can the PID can be + * recycled by the kernel. */ CloseHandle(childinfo->procHandle); @@ -7349,7 +7332,34 @@ pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired) */ free(childinfo); - /* Queue SIGCHLD signal */ + return pid; +} + +/* + * Note! Code below executes on a thread pool! All operations must + * be thread safe! Note that elog() and friends must *not* be used. + */ +static void WINAPI +pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired) +{ + /* Should never happen, since we use INFINITE as timeout value. */ + if (TimerOrWaitFired) + return; + + /* + * Post the win32_deadchild_waitinfo object for waitpid() to deal with. If + * that fails, we leak the object, but we also leak a whole process and + * get into an unrecoverable state, so there's not much point in worrying + * about that. We'd like to panic, but we can't use that infrastructure + * from this thread. + */ + if (!PostQueuedCompletionStatus(win32ChildQueue, + 0, + (ULONG_PTR) lpParameter, + NULL)) + write_stderr("could not post child completion status\n"); + + /* Queue SIGCHLD signal. */ pg_queue_signal(SIGCHLD); } #endif /* WIN32 */ From 9b52c8987250009a4cc9e6f203b6e2d49154214c Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Wed, 15 Mar 2023 13:57:00 +1300 Subject: [PATCH 29/61] Fix fractional vacuum_cost_delay. Commit 4753ef37 changed vacuum_delay_point() to use the WaitLatch() API, to fix the problem that vacuum could keep running for a very long time after the postmaster died. Unfortunately, that broke commit caf626b2's support for fractional vacuum_cost_delay, which shipped in PostgreSQL 12. WaitLatch() works in whole milliseconds. For now, revert the change from commit 4753ef37, but add an explicit check for postmaster death. That's an extra system call on systems other than Linux and FreeBSD, but that overhead doesn't matter much considering that we willingly went to sleep and woke up again. (In later work, we might add higher resolution timeouts to the latch API so that we could do this with our standard programming pattern, but that wouldn't be back-patched.) Back-patch to 14, where commit 4753ef37 arrived. Reported-by: Melanie Plageman Discussion: https://postgr.es/m/CAAKRu_b-q0hXCBUCAATh0Z4Zi6UkiC0k2DFgoD3nC-r3SkR3tg%40mail.gmail.com --- src/backend/commands/vacuum.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index c113f274b81..35c9a9fdafe 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -52,6 +52,7 @@ #include "postmaster/bgworker_internals.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" +#include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/procarray.h" #include "utils/acl.h" @@ -3005,11 +3006,18 @@ vacuum_delay_point(void) if (msec > VacuumCostDelay * 4) msec = VacuumCostDelay * 4; - (void) WaitLatch(MyLatch, - WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, - msec, - WAIT_EVENT_VACUUM_DELAY); - ResetLatch(MyLatch); + pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY); + pg_usleep(msec * 1000); + pgstat_report_wait_end(); + + /* + * We don't want to ignore postmaster death during very long vacuums + * with vacuum_cost_delay configured. We can't use the usual + * WaitLatch() approach here because we want microsecond-based sleep + * durations above. + */ + if (IsUnderPostmaster && !PostmasterIsAlive()) + exit(1); VacuumCostBalance = 0; From f32a619a6dbab63930e9f350c16ee10d4d256d26 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 15 Mar 2023 12:56:10 +0900 Subject: [PATCH 30/61] Improve WIN32 port of fstat() to detect more file types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation of _pgfstat64() is ineffective in detecting a terminal handle or an anonymous named pipe. This commit improves our port of fstat() to detect more efficiently such cases by relying on GetFileType(), and returning more correct data when the type found is either a FILE_TYPE_PIPE (_S_IFIFO) or a FILE_TYPE_CHAR (_S_IFCHR). This is part of a more global fix to address failures when feeding the output generated by pg_dump to pg_restore through a pipe, for example, but not all of it. We are also going to need to do something about fseek() and ftello() which are not reliable on WIN32 for the same cases where fstat() was incorrect. Fixing fstat() is independent of the rest, though, which is why both fixes are handled separately, and this is the first part of it. Reported-by: Daniel Watzinger Author: Daniel Watzinger, Juan José Santamaría Flecha Discussion: https://postgr.es/m/b1448cd7-871e-20e3-8398-895e2d1d3bf9@gmail.com Backpatch-through: 14 --- src/port/win32stat.c | 69 ++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/src/port/win32stat.c b/src/port/win32stat.c index 426e01f0efa..36c3b171f40 100644 --- a/src/port/win32stat.c +++ b/src/port/win32stat.c @@ -289,39 +289,66 @@ int _pgfstat64(int fileno, struct stat *buf) { HANDLE hFile = (HANDLE) _get_osfhandle(fileno); - BY_HANDLE_FILE_INFORMATION fiData; + DWORD fileType = FILE_TYPE_UNKNOWN; + DWORD lastError; + unsigned short st_mode; - if (hFile == INVALID_HANDLE_VALUE || buf == NULL) + /* + * When stdin, stdout, and stderr aren't associated with a stream the + * special value -2 is returned: + * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/get-osfhandle + */ + if (hFile == INVALID_HANDLE_VALUE || hFile == (HANDLE) -2 || buf == NULL) { errno = EINVAL; return -1; } + fileType = GetFileType(hFile); + lastError = GetLastError(); + /* - * Check if the fileno is a data stream. If so, unless it has been - * redirected to a file, getting information through its HANDLE will fail, - * so emulate its stat information in the most appropriate way and return - * it instead. + * Invoke GetLastError in order to distinguish between a "valid" return of + * FILE_TYPE_UNKNOWN and its return due to a calling error. In case of + * success, GetLastError returns NO_ERROR. */ - if ((fileno == _fileno(stdin) || - fileno == _fileno(stdout) || - fileno == _fileno(stderr)) && - !GetFileInformationByHandle(hFile, &fiData)) + if (fileType == FILE_TYPE_UNKNOWN && lastError != NO_ERROR) { - memset(buf, 0, sizeof(*buf)); - buf->st_mode = _S_IFCHR; - buf->st_dev = fileno; - buf->st_rdev = fileno; - buf->st_nlink = 1; - return 0; + _dosmaperr(lastError); + return -1; } - /* - * Since we already have a file handle there is no need to check for - * ERROR_DELETE_PENDING. - */ + switch (fileType) + { + /* The specified file is a disk file */ + case FILE_TYPE_DISK: + return fileinfo_to_stat(hFile, buf); + + /* + * The specified file is a socket, a named pipe, or an anonymous + * pipe. + */ + case FILE_TYPE_PIPE: + st_mode = _S_IFIFO; + break; + /* The specified file is a character file */ + case FILE_TYPE_CHAR: + st_mode = _S_IFCHR; + break; + /* Unused flag and unknown file type */ + case FILE_TYPE_REMOTE: + case FILE_TYPE_UNKNOWN: + default: + errno = EINVAL; + return -1; + } - return fileinfo_to_stat(hFile, buf); + memset(buf, 0, sizeof(*buf)); + buf->st_mode = st_mode; + buf->st_dev = fileno; + buf->st_rdev = fileno; + buf->st_nlink = 1; + return 0; } #endif /* WIN32 */ From fd03282bbc301767be4a7fb93d8ba1d12bb8da1d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 16 Mar 2023 16:50:56 -0400 Subject: [PATCH 31/61] Doc: mention CREATE+ATTACH PARTITION with CREATE TABLE...PARTITION OF. Clarify that ATTACH/DETACH PARTITION can be used to perform partition maintenance with less locking than straight CREATE TABLE/DROP TABLE. This was already stated in some places, but not emphasized. Back-patch to v14 where DETACH PARTITION CONCURRENTLY was added. (We had lower lock levels for ATTACH PARTITION before that, but this wording wouldn't apply.) Justin Pryzby, reviewed by Robert Treat and Jakub Wartak; a little further wordsmithing by me Discussion: https://postgr.es/m/20220718143304.GC18011@telsasoft.com --- doc/src/sgml/ddl.sgml | 13 +++++++------ doc/src/sgml/ref/create_table.sgml | 20 ++++++++++++++++---- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml index e49d19716c1..78f4448a5bc 100644 --- a/doc/src/sgml/ddl.sgml +++ b/doc/src/sgml/ddl.sgml @@ -3951,9 +3951,15 @@ CREATE TABLE measurement_y2008m02 PARTITION OF measurement As an alternative, it is sometimes more convenient to create the - new table outside the partition structure, and make it a proper + new table outside the partition structure, and attach it as a partition later. This allows new data to be loaded, checked, and transformed prior to it appearing in the partitioned table. + Moreover, the ATTACH PARTITION operation requires + only SHARE UPDATE EXCLUSIVE lock on the + partitioned table, as opposed to the ACCESS + EXCLUSIVE lock that is required by CREATE TABLE + ... PARTITION OF, so it is more friendly to concurrent + operations on the partitioned table. The CREATE TABLE ... LIKE option is helpful to avoid tediously repeating the parent table's definition: @@ -3973,11 +3979,6 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02 - - The ATTACH PARTITION command requires taking a - SHARE UPDATE EXCLUSIVE lock on the partitioned table. - - Before running the ATTACH PARTITION command, it is recommended to create a CHECK constraint on the table to diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index 4a0b6dcfcd4..3a4920c4c35 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -661,12 +661,24 @@ Where column_reference_storage_directive is: - Operations such as TRUNCATE which normally affect a table and all of its + Operations such as TRUNCATE + which normally affect a table and all of its inheritance children will cascade to all partitions, but may also be - performed on an individual partition. Note that dropping a partition - with DROP TABLE requires taking an ACCESS - EXCLUSIVE lock on the parent table. + performed on an individual partition. + + + Note that creating a partition using PARTITION OF + requires taking an ACCESS EXCLUSIVE lock on the + parent partitioned table. Likewise, dropping a partition + with DROP TABLE requires taking + an ACCESS EXCLUSIVE lock on the parent table. + It is possible to use ALTER + TABLE ATTACH/DETACH PARTITION to perform these + operations with a weaker lock, thus reducing interference with + concurrent operations on the partitioned table. + + From 8a182bb29bccc4ed98cfad07f6950c05825bdd7e Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Fri, 17 Mar 2023 09:44:42 +1300 Subject: [PATCH 32/61] Small tidyup for commit d41a178b. A comment was left behind claiming that we needed to use malloc() rather than palloc() because the corresponding free would run in another thread, but that's not true anymore. Remove that comment. And, with the reason being gone, we might as well actually use palloc(). Back-patch to supported releases, like d41a178b. Discussion: https://postgr.es/m/CA%2BhUKG%2BpdM9v3Jv4tc2BFx2jh_daY3uzUyAGBhtDkotEQDNPYw%40mail.gmail.com --- src/backend/postmaster/postmaster.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index aab9d97da83..6138fc32cb1 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -5430,13 +5430,10 @@ internal_forkexec(int argc, char *argv[], Port *port) /* * Queue a waiter to signal when this child dies. The wait will be handled - * automatically by an operating system thread pool. - * - * Note: use malloc instead of palloc, since it needs to be thread-safe. - * Struct will be free():d from the callback function that runs on a - * different thread. + * automatically by an operating system thread pool. The memory will be + * freed by a later call to waitpid(). */ - childinfo = malloc(sizeof(win32_deadchild_waitinfo)); + childinfo = palloc(sizeof(win32_deadchild_waitinfo)); if (!childinfo) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), @@ -7330,7 +7327,7 @@ waitpid(pid_t pid, int *exitstatus, int options) * Free struct that was allocated before the call to * RegisterWaitForSingleObject() */ - free(childinfo); + pfree(childinfo); return pid; } From 604a6f2825a70ce35e3ba98c1ecb7bbc1191c355 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Fri, 17 Mar 2023 14:44:12 +1300 Subject: [PATCH 33/61] Small tidyup for commit d41a178b, part II. Further to commit 6a9229da, checking for NULL is now redundant. An "out of memory" error would have been thrown already by palloc() and treated as FATAL, so we can delete a few more lines. Back-patch to all releases, like those other commits. Reported-by: Tom Lane Discussion: https://postgr.es/m/4040668.1679013388%40sss.pgh.pa.us --- src/backend/postmaster/postmaster.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 6138fc32cb1..377055f6f84 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -5434,11 +5434,6 @@ internal_forkexec(int argc, char *argv[], Port *port) * freed by a later call to waitpid(). */ childinfo = palloc(sizeof(win32_deadchild_waitinfo)); - if (!childinfo) - ereport(FATAL, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - childinfo->procHandle = pi.hProcess; childinfo->procId = pi.dwProcessId; From fb5d10013f18bf494766ac3b3b273f06168b1b2e Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Thu, 16 Mar 2023 17:48:47 -0700 Subject: [PATCH 34/61] tests: Minimize syslog activity by slapd Until now the tests using slapd spammed syslog for every connection / query. Use logfile-only to prevent syslog activity. Unfortunately that only takes effect after logging the first message, but that's still much better than the prior situation. Discussion: https://postgr.es/m/20230311233708.3yjdbjkly2q4gq2j@awork3.anarazel.de Backpatch: 11- --- src/test/ldap/t/001_auth.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/ldap/t/001_auth.pl b/src/test/ldap/t/001_auth.pl index 0a310ccb15a..5f2ba8e5967 100644 --- a/src/test/ldap/t/001_auth.pl +++ b/src/test/ldap/t/001_auth.pl @@ -78,6 +78,7 @@ pidfile $slapd_pidfile logfile $slapd_logfile +logfile-only on access to * by * read From b95732b57bd6ce536f256a10e9fcd91d3a445ba7 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Thu, 16 Mar 2023 23:03:31 -0700 Subject: [PATCH 35/61] tests: Prevent syslog activity by slapd, take 2 Unfortunately it turns out that the logfile-only option added in b9f8d1cbad7 is only available in openldap starting in 2.6. Luckily the option to control the log level (loglevel/-s) have been around for much longer. As it turns out loglevel/-s only control what goes into syslog, not what ends up in the file specified with 'logfile' and stderr. While we currently are specifying 'logfile', nothing ends up in it, as the option only controls debug messages, and we didn't set a debug level. The debug level can only be configured on the commandline and also prevents forking. That'd require larger changes, so this commit doesn't tackle that issue. Specify the syslog level when starting slapd using -s, as that allows to prevent all syslog messages if one uses '0' instead of 'none', while loglevel doesn't prevent the first message. Discussion: https://postgr.es/m/20230311233708.3yjdbjkly2q4gq2j@awork3.anarazel.de Backpatch: 11- --- src/test/ldap/t/001_auth.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/ldap/t/001_auth.pl b/src/test/ldap/t/001_auth.pl index 5f2ba8e5967..c2f588deeb8 100644 --- a/src/test/ldap/t/001_auth.pl +++ b/src/test/ldap/t/001_auth.pl @@ -78,7 +78,6 @@ pidfile $slapd_pidfile logfile $slapd_logfile -logfile-only on access to * by * read @@ -114,7 +113,8 @@ "-CA", "$slapd_certs/ca.crt", "-CAkey", "$slapd_certs/ca.key", "-CAcreateserial", "-out", "$slapd_certs/server.crt"; -system_or_bail $slapd, '-f', $slapd_conf, '-h', "$ldap_url $ldaps_url"; +# -s0 prevents log messages ending up in syslog +system_or_bail $slapd, '-f', $slapd_conf,'-s0', '-h', "$ldap_url $ldaps_url"; END { From b00a9926ee1aeef2a5368595744bde1b4be11b42 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 17 Mar 2023 13:31:40 -0400 Subject: [PATCH 36/61] Fix pg_dump for hash partitioning on enum columns. Hash partitioning on an enum is problematic because the hash codes are derived from the OIDs assigned to the enum values, which will almost certainly be different after a dump-and-reload than they were before. This means that some rows probably end up in different partitions than before, causing restore to fail because of partition constraint violations. (pg_upgrade dodges this problem by using hacks to force the enum values to keep the same OIDs, but that's not possible nor desirable for pg_dump.) Users can work around that by specifying --load-via-partition-root, but since that's a dump-time not restore-time decision, one might find out the need for it far too late. Instead, teach pg_dump to apply that option automatically when dealing with a partitioned table that has hash-on-enum partitioning. Also deal with a pre-existing issue for --load-via-partition-root mode: in a parallel restore, we try to TRUNCATE target tables just before loading them, in order to enable some backend optimizations. This is bad when using --load-via-partition-root because (a) we're likely to suffer deadlocks from restore jobs trying to restore rows into other partitions than they came from, and (b) if we miss getting a deadlock we might still lose data due to a TRUNCATE removing rows from some already-completed restore job. The fix for this is conceptually simple: just don't TRUNCATE if we're dealing with a --load-via-partition-root case. The tricky bit is for pg_restore to identify those cases. In dumps using COPY commands we can inspect each COPY command to see if it targets the nominal target table or some ancestor. However, in dumps using INSERT commands it's pretty impractical to examine the INSERTs in advance. To provide a solution for that going forward, modify pg_dump to mark TABLE DATA items that are using --load-via-partition-root with a comment. (This change also responds to a complaint from Robert Haas that the dump output for --load-via-partition-root is pretty confusing.) pg_restore checks for the special comment as well as checking the COPY command if present. This will fail to identify the combination of --load-via-partition-root and --inserts in pre-existing dump files, but that should be a pretty rare case in the field. If it does happen you will probably get a deadlock failure that you can work around by not using parallel restore, which is the same as before this bug fix. Having done this, there seems no remaining reason for the alarmism in the pg_dump man page about combining --load-via-partition-root with parallel restore, so remove that warning. Patch by me; thanks to Julien Rouhaud for review. Back-patch to v11 where hash partitioning was introduced. Discussion: https://postgr.es/m/1376149.1675268279@sss.pgh.pa.us --- doc/src/sgml/ref/pg_dump.sgml | 10 ---------- src/bin/pg_dump/pg_dump.c | 2 +- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index 956f97e2537..4bf68d3fbd6 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -863,16 +863,6 @@ PostgreSQL documentation and the two systems have different definitions of the collation used to sort the partitioning column. - - - It is best not to use parallelism when restoring from an archive made - with this option, because pg_restore will - not know exactly which partition(s) a given archive data item will - load data into. This could result in inefficiency due to lock - conflicts between parallel jobs, or perhaps even restore failures due - to foreign key constraints being set up before all the relevant data - is loaded. - diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 4a8f4937605..8a2095b21a3 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -8298,7 +8298,7 @@ getPartitioningInfo(Archive *fout) tbinfo = findTableByOid(tabrelid); if (tbinfo == NULL) fatal("failed sanity check, table OID %u appearing in pg_partitioned_table not found", - tabrelid); + tabrelid); tbinfo->unsafe_partitions = true; } From 9d1db9bbe0b0a0f4a3397214e1341dd56e55a287 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 18 Mar 2023 16:11:22 -0400 Subject: [PATCH 37/61] Doc: fix documentation example for bytea hex output format. Per report from rsindlin Discussion: https://postgr.es/m/167907221210.1803488.5939223864945604536@wrigleys.postgresql.org --- doc/src/sgml/datatype.sgml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 0e89b768c5d..692b6fe2c43 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -1438,7 +1438,12 @@ SELECT b, char_length(b) FROM test2; Example: -SELECT '\xDEADBEEF'; +SET bytea_output = 'hex'; + +SELECT '\xDEADBEEF'::bytea; + bytea +------------ + \xdeadbeef From ac91309afd52217e7349ef93006001aca37fc60a Mon Sep 17 00:00:00 2001 From: David Rowley Date: Mon, 20 Mar 2023 13:30:55 +1300 Subject: [PATCH 38/61] Fix memory leak in Memoize cache key evaluation When probing the Memoize cache to check if the current cache key values exist in the cache, we perform an evaluation of the expressions making up the cache key before probing the hash table for those values. This operation could leak memory as it is possible that the cache key is an expression which requires allocation of memory, as was the case in bug 17844. Here we fix this by correctly switching to the per tuple context before evaluating the cache expressions so that the memory is freed next time the per tuple context is reset. Bug: 17844 Reported-by: Alexey Ermakov Discussion: https://postgr.es/m/17844-d2f6f9e75a622bed@postgresql.org Backpatch-through: 14, where Memoize was introduced --- src/backend/executor/nodeMemoize.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/backend/executor/nodeMemoize.c b/src/backend/executor/nodeMemoize.c index b32f2469135..c078b68740b 100644 --- a/src/backend/executor/nodeMemoize.c +++ b/src/backend/executor/nodeMemoize.c @@ -289,11 +289,18 @@ prepare_probe_slot(MemoizeState *mstate, MemoizeKey *key) if (key == NULL) { + ExprContext *econtext = mstate->ss.ps.ps_ExprContext; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + /* Set the probeslot's values based on the current parameter values */ for (int i = 0; i < numKeys; i++) pslot->tts_values[i] = ExecEvalExpr(mstate->param_exprs[i], - mstate->ss.ps.ps_ExprContext, + econtext, &pslot->tts_isnull[i]); + + MemoryContextSwitchTo(oldcontext); } else { From bb73725d7d7b5b23f5eea7e74d6ac49e16c4d711 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Mon, 20 Mar 2023 09:51:50 +0100 Subject: [PATCH 39/61] Fix netmask handling in inet_minmax_multi_ops When calculating distance in brin_minmax_multi_distance_inet(), the netmask was applied incorrectly. This results in (seemingly) incorrect ordering of values, triggering an assert. For builds without asserts this is mostly harmless - we may merge other ranges, possibly resulting in slightly less efficient index. But it's still correct and the greedy algorithm doesn't guarantee optimality anyway. Backpatch to 14, where minmax-multi indexes were introduced. Reported by Dmitry Dolgov, investigation and fix by me. Reported-by: Dmitry Dolgov Backpatch-through: 14 Discussion: https://postgr.es/m/17774-c6f3e36dd4471e67@postgresql.org --- src/backend/access/brin/brin_minmax_multi.c | 4 ++-- src/test/regress/expected/brin_multi.out | 6 ++++++ src/test/regress/sql/brin_multi.sql | 7 +++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/backend/access/brin/brin_minmax_multi.c b/src/backend/access/brin/brin_minmax_multi.c index b343226a191..b4e50937609 100644 --- a/src/backend/access/brin/brin_minmax_multi.c +++ b/src/backend/access/brin/brin_minmax_multi.c @@ -2364,14 +2364,14 @@ brin_minmax_multi_distance_inet(PG_FUNCTION_ARGS) unsigned char mask; int nbits; - nbits = lena - (i * 8); + nbits = Max(0, lena - (i * 8)); if (nbits < 8) { mask = (0xFF << (8 - nbits)); addra[i] = (addra[i] & mask); } - nbits = lenb - (i * 8); + nbits = Max(0, lenb - (i * 8)); if (nbits < 8) { mask = (0xFF << (8 - nbits)); diff --git a/src/test/regress/expected/brin_multi.out b/src/test/regress/expected/brin_multi.out index 51277fdb887..0720a66b2ae 100644 --- a/src/test/regress/expected/brin_multi.out +++ b/src/test/regress/expected/brin_multi.out @@ -352,6 +352,12 @@ VACUUM brintest_multi; -- force a summarization cycle in brinidx insert into public.brintest_multi (float4col) values (real 'nan'); insert into public.brintest_multi (float8col) values (real 'nan'); UPDATE brintest_multi SET int8col = int8col * int4col; +-- Test handling of inet netmasks with inet_minmax_multi_ops +CREATE TABLE brin_test_inet (a inet); +CREATE INDEX ON brin_test_inet USING brin (a inet_minmax_multi_ops); +INSERT INTO brin_test_inet VALUES ('127.0.0.1/0'); +INSERT INTO brin_test_inet VALUES ('0.0.0.0/12'); +DROP TABLE brin_test_inet; -- Tests for brin_summarize_new_values SELECT brin_summarize_new_values('brintest_multi'); -- error, not an index ERROR: "brintest_multi" is not an index diff --git a/src/test/regress/sql/brin_multi.sql b/src/test/regress/sql/brin_multi.sql index 9deb8d2573d..a46c09951b5 100644 --- a/src/test/regress/sql/brin_multi.sql +++ b/src/test/regress/sql/brin_multi.sql @@ -359,6 +359,13 @@ insert into public.brintest_multi (float8col) values (real 'nan'); UPDATE brintest_multi SET int8col = int8col * int4col; +-- Test handling of inet netmasks with inet_minmax_multi_ops +CREATE TABLE brin_test_inet (a inet); +CREATE INDEX ON brin_test_inet USING brin (a inet_minmax_multi_ops); +INSERT INTO brin_test_inet VALUES ('127.0.0.1/0'); +INSERT INTO brin_test_inet VALUES ('0.0.0.0/12'); +DROP TABLE brin_test_inet; + -- Tests for brin_summarize_new_values SELECT brin_summarize_new_values('brintest_multi'); -- error, not an index SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index From f2b217a9f0886447daf09d98d61f5f42bbf79e31 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Tue, 21 Mar 2023 14:29:34 +1300 Subject: [PATCH 40/61] Fix race in parallel hash join batch cleanup, take II. With unlucky timing and parallel_leader_participation=off (not the default), PHJ could attempt to access per-batch shared state just as it was being freed. There was code intended to prevent that by checking for a cleared pointer, but it was racy. Fix, by introducing an extra barrier phase. The new phase PHJ_BUILD_RUNNING means that it's safe to access the per-batch state to find a batch to help with, and PHJ_BUILD_DONE means that it is too late. The last to detach will free the array of per-batch state as before, but now it will also atomically advance the phase, so that late attachers can avoid the hazard. This mirrors the way per-batch hash tables are freed (see phases PHJ_BATCH_PROBING and PHJ_BATCH_DONE). An earlier attempt to fix this (commit 3b8981b6, later reverted) missed one special case. When the inner side is empty (the "empty inner optimization), the build barrier would only make it to PHJ_BUILD_HASHING_INNER phase before workers attempted to detach from the hashtable. In that case, fast-forward the build barrier to PHJ_BUILD_RUNNING before proceeding, so that our later assertions hold and we can still negotiate who is cleaning up. Revealed by build farm failures, where BarrierAttach() failed a sanity check assertion, because the memory had been clobbered by dsa_free(). In non-assert builds, the result could be a segmentation fault. Back-patch to all supported releases. Author: Thomas Munro Author: Melanie Plageman Reported-by: Michael Paquier Reported-by: David Geier Tested-by: David Geier Discussion: https://postgr.es/m/20200929061142.GA29096%40paquier.xyz --- src/backend/executor/nodeHash.c | 50 +++++++++++++++++--------- src/backend/executor/nodeHashjoin.c | 54 ++++++++++++++++++++--------- src/include/executor/hashjoin.h | 3 +- 3 files changed, 74 insertions(+), 33 deletions(-) diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 8779d93b06f..a236f5b4819 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -436,14 +436,21 @@ MultiExecParallelHash(HashState *node) hashtable->nbuckets = pstate->nbuckets; hashtable->log2_nbuckets = my_log2(hashtable->nbuckets); hashtable->totalTuples = pstate->total_tuples; - ExecParallelHashEnsureBatchAccessors(hashtable); + + /* + * Unless we're completely done and the batch state has been freed, make + * sure we have accessors. + */ + if (BarrierPhase(build_barrier) < PHJ_BUILD_DONE) + ExecParallelHashEnsureBatchAccessors(hashtable); /* * The next synchronization point is in ExecHashJoin's HJ_BUILD_HASHTABLE - * case, which will bring the build phase to PHJ_BUILD_DONE (if it isn't - * there already). + * case, which will bring the build phase to PHJ_BUILD_RUNNING (if it + * isn't there already). */ Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER || + BarrierPhase(build_barrier) == PHJ_BUILD_RUNNING || BarrierPhase(build_barrier) == PHJ_BUILD_DONE); } @@ -752,7 +759,7 @@ ExecHashTableCreate(HashState *state, HashJoinState *hjstate, /* * The next Parallel Hash synchronization point is in * MultiExecParallelHash(), which will progress it all the way to - * PHJ_BUILD_DONE. The caller must not return control from this + * PHJ_BUILD_RUNNING. The caller must not return control from this * executor node between now and then. */ } @@ -3763,14 +3770,11 @@ ExecParallelHashEnsureBatchAccessors(HashJoinTable hashtable) } /* - * It's possible for a backend to start up very late so that the whole - * join is finished and the shm state for tracking batches has already - * been freed by ExecHashTableDetach(). In that case we'll just leave - * hashtable->batches as NULL so that ExecParallelHashJoinNewBatch() gives - * up early. + * We should never see a state where the batch-tracking array is freed, + * because we should have given up sooner if we join when the build + * barrier has reached the PHJ_BUILD_DONE phase. */ - if (!DsaPointerIsValid(pstate->batches)) - return; + Assert(DsaPointerIsValid(pstate->batches)); /* Use hash join memory context. */ oldcxt = MemoryContextSwitchTo(hashtable->hashCxt); @@ -3895,9 +3899,18 @@ ExecHashTableDetachBatch(HashJoinTable hashtable) void ExecHashTableDetach(HashJoinTable hashtable) { - if (hashtable->parallel_state) + ParallelHashJoinState *pstate = hashtable->parallel_state; + + /* + * If we're involved in a parallel query, we must either have gotten all + * the way to PHJ_BUILD_RUNNING, or joined too late and be in + * PHJ_BUILD_DONE. + */ + Assert(!pstate || + BarrierPhase(&pstate->build_barrier) >= PHJ_BUILD_RUNNING); + + if (pstate && BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_RUNNING) { - ParallelHashJoinState *pstate = hashtable->parallel_state; int i; /* Make sure any temporary files are closed. */ @@ -3913,17 +3926,22 @@ ExecHashTableDetach(HashJoinTable hashtable) } /* If we're last to detach, clean up shared memory. */ - if (BarrierDetach(&pstate->build_barrier)) + if (BarrierArriveAndDetach(&pstate->build_barrier)) { + /* + * Late joining processes will see this state and give up + * immediately. + */ + Assert(BarrierPhase(&pstate->build_barrier) == PHJ_BUILD_DONE); + if (DsaPointerIsValid(pstate->batches)) { dsa_free(hashtable->area, pstate->batches); pstate->batches = InvalidDsaPointer; } } - - hashtable->parallel_state = NULL; } + hashtable->parallel_state = NULL; } /* diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 9ec70f16e31..88eaaa10cef 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -47,7 +47,8 @@ * PHJ_BUILD_ALLOCATING -- one sets up the batches and table 0 * PHJ_BUILD_HASHING_INNER -- all hash the inner rel * PHJ_BUILD_HASHING_OUTER -- (multi-batch only) all hash the outer - * PHJ_BUILD_DONE -- building done, probing can begin + * PHJ_BUILD_RUNNING -- building done, probing can begin + * PHJ_BUILD_DONE -- all work complete, one frees batches * * While in the phase PHJ_BUILD_HASHING_INNER a separate pair of barriers may * be used repeatedly as required to coordinate expansions in the number of @@ -75,7 +76,7 @@ * batches whenever it encounters them while scanning and probing, which it * can do because it processes batches in serial order. * - * Once PHJ_BUILD_DONE is reached, backends then split up and process + * Once PHJ_BUILD_RUNNING is reached, backends then split up and process * different batches, or gang up and work together on probing batches if there * aren't enough to go around. For each batch there is a separate barrier * with the following phases: @@ -97,11 +98,16 @@ * * To avoid deadlocks, we never wait for any barrier unless it is known that * all other backends attached to it are actively executing the node or have - * already arrived. Practically, that means that we never return a tuple - * while attached to a barrier, unless the barrier has reached its final - * state. In the slightly special case of the per-batch barrier, we return - * tuples while in PHJ_BATCH_PROBING phase, but that's OK because we use - * BarrierArriveAndDetach() to advance it to PHJ_BATCH_DONE without waiting. + * finished. Practically, that means that we never emit a tuple while attached + * to a barrier, unless the barrier has reached a phase that means that no + * process will wait on it again. We emit tuples while attached to the build + * barrier in phase PHJ_BUILD_RUNNING, and to a per-batch barrier in phase + * PHJ_BATCH_PROBING. These are advanced to PHJ_BUILD_DONE and PHJ_BATCH_DONE + * respectively without waiting, using BarrierArriveAndDetach(). The last to + * detach receives a different return value so that it knows that it's safe to + * clean up. Any straggler process that attaches after that phase is reached + * will see that it's too late to participate or access the relevant shared + * memory objects. * *------------------------------------------------------------------------- */ @@ -387,7 +393,21 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel) * outer relation. */ if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node)) + { + if (parallel) + { + /* + * Advance the build barrier to PHJ_BUILD_RUNNING + * before proceeding so we can negotiate resource + * cleanup. + */ + Barrier *build_barrier = ¶llel_state->build_barrier; + + while (BarrierPhase(build_barrier) < PHJ_BUILD_RUNNING) + BarrierArriveAndWait(build_barrier, 0); + } return NULL; + } /* * Prefetch JoinQual or NonJoinQual to prevent motion hazard. @@ -433,6 +453,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel) build_barrier = ¶llel_state->build_barrier; Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER || + BarrierPhase(build_barrier) == PHJ_BUILD_RUNNING || BarrierPhase(build_barrier) == PHJ_BUILD_DONE); if (BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER) { @@ -453,9 +474,18 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel) BarrierArriveAndWait(build_barrier, WAIT_EVENT_HASH_BUILD_HASH_OUTER); } - Assert(BarrierPhase(build_barrier) == PHJ_BUILD_DONE); + else if (BarrierPhase(build_barrier) == PHJ_BUILD_DONE) + { + /* + * If we attached so late that the job is finished and + * the batch state has been freed, we can return + * immediately. + */ + return NULL; + } /* Each backend should now select a batch to work on. */ + Assert(BarrierPhase(build_barrier) == PHJ_BUILD_RUNNING); hashtable->curbatch = -1; node->hj_JoinState = HJ_NEED_NEW_BATCH; @@ -1428,14 +1458,6 @@ ExecParallelHashJoinNewBatch(HashJoinState *hjstate) Barrier *batch0_barrier = NULL; ParallelHashJoinState *pstate = hashtable->parallel_state; - /* - * If we started up so late that the batch tracking array has been freed - * already by ExecHashTableDetach(), then we are finished. See also - * ExecParallelHashEnsureBatchAccessors(). - */ - if (hashtable->batches == NULL) - return false; - /* * If we were already attached to a batch, remember not to bother checking * it again, and detach from it (possibly freeing the hash table if we are diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index b1fbaacf5e9..e324e67d914 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -298,7 +298,8 @@ typedef struct ParallelHashJoinState #define PHJ_BUILD_ALLOCATING 1 #define PHJ_BUILD_HASHING_INNER 2 #define PHJ_BUILD_HASHING_OUTER 3 -#define PHJ_BUILD_DONE 4 +#define PHJ_BUILD_RUNNING 4 +#define PHJ_BUILD_DONE 5 /* The phases for probing each batch, used by for batch_barrier. */ #define PHJ_BATCH_ELECTING 0 From a58f565defe34c3eb92d0b79af257cd72ec896a6 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Tue, 21 Mar 2023 09:18:51 +0530 Subject: [PATCH 41/61] Ignore dropped columns during apply of update/delete. We fail to apply updates and deletes when the REPLICA IDENTITY FULL is used for the table having dropped columns. We didn't use to ignore dropped columns while doing tuple comparison among the tuples from the publisher and subscriber during apply of updates and deletes. Author: Onder Kalaci, Shi yu Reviewed-by: Amit Kapila Discussion: https://postgr.es/m/CACawEhVQC9WoofunvXg12aXtbqKnEgWxoRx3+v8q32AWYsdpGg@mail.gmail.com --- src/backend/executor/execReplication.c | 10 ++++- src/test/subscription/t/100_bugs.pl | 56 +++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 8202e050ec8..093c0228c48 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -243,6 +243,14 @@ tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2, Form_pg_attribute att; TypeCacheEntry *typentry; + att = TupleDescAttr(slot1->tts_tupleDescriptor, attrnum); + + /* + * Ignore dropped columns as the publisher doesn't send those + */ + if (att->attisdropped) + continue; + /* * If one value is NULL and other is not, then they are certainly not * equal @@ -256,8 +264,6 @@ tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2, if (slot1->tts_isnull[attrnum] || slot2->tts_isnull[attrnum]) continue; - att = TupleDescAttr(slot1->tts_tupleDescriptor, attrnum); - typentry = eq[attrnum]; if (typentry == NULL) { diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl index 91602c43399..b9da72eaf2a 100644 --- a/src/test/subscription/t/100_bugs.pl +++ b/src/test/subscription/t/100_bugs.pl @@ -6,7 +6,7 @@ use warnings; use PostgresNode; use TestLib; -use Test::More tests => 7; +use Test::More tests => 8; # Bug #15114 @@ -298,3 +298,57 @@ $node_publisher->stop('fast'); $node_subscriber->stop('fast'); + +# The bug was that when the REPLICA IDENTITY FULL is used with dropped columns, +# we fail to apply updates and deletes +my $node_publisher_d_cols = get_new_node('node_publisher_d_cols'); +$node_publisher_d_cols->init(allows_streaming => 'logical'); +$node_publisher_d_cols->start; + +my $node_subscriber_d_cols = get_new_node('node_subscriber_d_cols'); +$node_subscriber_d_cols->init(allows_streaming => 'logical'); +$node_subscriber_d_cols->start; + +$node_publisher_d_cols->safe_psql( + 'postgres', qq( + CREATE TABLE dropped_cols (a int, b_drop int, c int); + ALTER TABLE dropped_cols REPLICA IDENTITY FULL; + CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols; + -- some initial data + INSERT INTO dropped_cols VALUES (1, 1, 1); +)); + +$node_subscriber_d_cols->safe_psql( + 'postgres', qq( + CREATE TABLE dropped_cols (a int, b_drop int, c int); +)); + +my $publisher_connstr_d_cols = + $node_publisher_d_cols->connstr . ' dbname=postgres'; +$node_subscriber_d_cols->safe_psql('postgres', + "CREATE SUBSCRIPTION sub_dropped_cols CONNECTION '$publisher_connstr_d_cols' PUBLICATION pub_dropped_cols" +); +$node_subscriber_d_cols->wait_for_subscription_sync; + +$node_publisher_d_cols->safe_psql( + 'postgres', qq( + ALTER TABLE dropped_cols DROP COLUMN b_drop; +)); +$node_subscriber_d_cols->safe_psql( + 'postgres', qq( + ALTER TABLE dropped_cols DROP COLUMN b_drop; +)); + +$node_publisher_d_cols->safe_psql( + 'postgres', qq( + UPDATE dropped_cols SET a = 100; +)); +$node_publisher_d_cols->wait_for_catchup('sub_dropped_cols'); + +is( $node_subscriber_d_cols->safe_psql( + 'postgres', "SELECT count(*) FROM dropped_cols WHERE a = 100"), + qq(1), + 'replication with RI FULL and dropped columns'); + +$node_publisher_d_cols->stop('fast'); +$node_subscriber_d_cols->stop('fast'); From 01fbce035911f49015b793978aba63d811e4a943 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 22 Mar 2023 18:32:04 +0900 Subject: [PATCH 42/61] doc: Add description of some missing monitoring functions This commit adds some documentation about two monitoring functions: - pg_stat_get_xact_blocks_fetched() - pg_stat_get_xact_blocks_hit() The description of these functions has been removed in ddfc2d9, later simplified by 5f2b089, assuming that all the functions whose descriptions were removed are used in system views. Unfortunately, some of them were are not used in any system views, so they lacked documentation. This gap exists in the docs for a long time, so backpatch all the way down. Reported-by: Michael Paquier Author: Bertrand Drouvot Reviewed-by: Kyotaro Horiguchi Discussion: https://postgr.es/m/ZBeeH5UoNkTPrwHO@paquier.xyz Backpatch-through: 11 --- doc/src/sgml/monitoring.sgml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index ce6c34d6ea1..3c56f9fa4f1 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -5051,6 +5051,34 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i + + + + pg_stat_get_xact_blocks_fetched + + pg_stat_get_xact_blocks_fetched ( oid ) + bigint + + + Returns the number of buffers fetched for table or index, in the current + transaction. + + + + + + + pg_stat_get_xact_blocks_hit + + pg_stat_get_xact_blocks_hit ( oid ) + bigint + + + Returns the number of buffer hits for table or index, in the current + transaction. + + + From 8bdea85cdb272d89d5eb8493fceecff3ce5220a0 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 23 Mar 2023 11:32:22 +0530 Subject: [PATCH 43/61] Ignore generated columns during apply of update/delete. We fail to apply updates and deletes when the REPLICA IDENTITY FULL is used for the table having generated columns. We didn't use to ignore generated columns while doing tuple comparison among the tuples from the publisher and subscriber during apply of updates and deletes. Author: Onder Kalaci Reviewed-by: Shi yu, Amit Kapila Backpatch-through: 12 Discussion: https://postgr.es/m/CACawEhVQC9WoofunvXg12aXtbqKnEgWxoRx3+v8q32AWYsdpGg@mail.gmail.com --- src/backend/executor/execReplication.c | 5 +++-- src/test/subscription/t/100_bugs.pl | 18 ++++++++++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 093c0228c48..831699ff9f5 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -246,9 +246,10 @@ tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2, att = TupleDescAttr(slot1->tts_tupleDescriptor, attrnum); /* - * Ignore dropped columns as the publisher doesn't send those + * Ignore dropped and generated columns as the publisher doesn't send + * those */ - if (att->attisdropped) + if (att->attisdropped || att->attgenerated) continue; /* diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl index b9da72eaf2a..cce91891ab9 100644 --- a/src/test/subscription/t/100_bugs.pl +++ b/src/test/subscription/t/100_bugs.pl @@ -6,7 +6,7 @@ use warnings; use PostgresNode; use TestLib; -use Test::More tests => 8; +use Test::More tests => 9; # Bug #15114 @@ -299,8 +299,8 @@ $node_publisher->stop('fast'); $node_subscriber->stop('fast'); -# The bug was that when the REPLICA IDENTITY FULL is used with dropped columns, -# we fail to apply updates and deletes +# The bug was that when the REPLICA IDENTITY FULL is used with dropped or +# generated columns, we fail to apply updates and deletes my $node_publisher_d_cols = get_new_node('node_publisher_d_cols'); $node_publisher_d_cols->init(allows_streaming => 'logical'); $node_publisher_d_cols->start; @@ -313,14 +313,18 @@ 'postgres', qq( CREATE TABLE dropped_cols (a int, b_drop int, c int); ALTER TABLE dropped_cols REPLICA IDENTITY FULL; - CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols; + CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int); + ALTER TABLE generated_cols REPLICA IDENTITY FULL; + CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols; -- some initial data INSERT INTO dropped_cols VALUES (1, 1, 1); + INSERT INTO generated_cols (a, c) VALUES (1, 1); )); $node_subscriber_d_cols->safe_psql( 'postgres', qq( CREATE TABLE dropped_cols (a int, b_drop int, c int); + CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int); )); my $publisher_connstr_d_cols = @@ -342,6 +346,7 @@ $node_publisher_d_cols->safe_psql( 'postgres', qq( UPDATE dropped_cols SET a = 100; + UPDATE generated_cols SET a = 100; )); $node_publisher_d_cols->wait_for_catchup('sub_dropped_cols'); @@ -350,5 +355,10 @@ qq(1), 'replication with RI FULL and dropped columns'); +is( $node_subscriber_d_cols->safe_psql( + 'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"), + qq(1), + 'replication with RI FULL and generated columns'); + $node_publisher_d_cols->stop('fast'); $node_subscriber_d_cols->stop('fast'); From 9292a84347adbd64da727e456bfa1e27e45da3a4 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Thu, 23 Mar 2023 15:29:28 -0400 Subject: [PATCH 44/61] amcheck: Fix verify_heapam for tuples where xmin or xmax is 0. In such cases, get_xid_status() doesn't set its output parameter (the third argument), so we shouldn't fall through to code which will test the value of that parameter. There are five existing calls to get_xid_status(), three of which seem to already handle this case properly. This commit tries to fix the other two. If we're checking xmin and find that it is invalid (i.e. 0) just report that as corruption, similar to what's already done in the three cases that seem correct. If we're checking xmax and find that's invalid, that's fine: it just means that the tuple hasn't been updated or deleted. Thanks to Andres Freund and valgrind for finding this problem, and also to Andres for having a look at the patch. This bug seems to go all the way back to where verify_heapam was first introduced, but wasn't detected until recently, possibly because of the new test cases added for update chain verification. Back-patch to v14, where this code showed up. Discussion: http://postgr.es/m/CA+TgmoZAYzQZqyUparXy_ks3OEOfLD9-bEXt8N-2tS1qghX9gQ@mail.gmail.com --- contrib/amcheck/verify_heapam.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index f7964b78173..cc49ccb26f6 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -775,6 +775,9 @@ check_tuple_visibility(HeapCheckContext *ctx) switch (get_xid_status(xmin, ctx, &xmin_status)) { case XID_INVALID: + report_corruption(ctx, + pstrdup("xmin is invalid")); + return false; case XID_BOUNDS_OK: break; case XID_IN_FUTURE: @@ -1110,6 +1113,9 @@ check_tuple_visibility(HeapCheckContext *ctx) xmax = HeapTupleHeaderGetRawXmax(tuphdr); switch (get_xid_status(xmax, ctx, &xmax_status)) { + case XID_INVALID: + ctx->tuple_could_be_pruned = false; + return true; case XID_IN_FUTURE: report_corruption(ctx, psprintf("xmax %u equals or exceeds next valid transaction ID %u:%u", @@ -1132,7 +1138,6 @@ check_tuple_visibility(HeapCheckContext *ctx) XidFromFullTransactionId(ctx->oldest_fxid))); return false; /* corrupt */ case XID_BOUNDS_OK: - case XID_INVALID: break; } From 9d63ac48f3e5a872d778215f687b8b67c30385aa Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 26 Mar 2023 13:41:06 -0400 Subject: [PATCH 45/61] Fix oversights in array manipulation. The nested-arrays code path in ExecEvalArrayExpr() used palloc to allocate the result array, whereas every other array-creating function has used palloc0 since 18c0b4ecc. This mostly works, but unused bits past the end of the nulls bitmap may end up undefined. That causes valgrind complaints with -DWRITE_READ_PARSE_PLAN_TREES, and could cause planner misbehavior as cited in 18c0b4ecc. There seems no very good reason why we should strive to avoid palloc0 in just this one case, so fix it the easy way with s/palloc/palloc0/. While looking at that I noted that we also failed to check for overflow of "nbytes" and "nitems" while summing the sizes of the sub-arrays, potentially allowing a crash due to undersized output allocation. For "nbytes", follow the policy used by other array-munging code of checking for overflow after each addition. (As elsewhere, the last addition of the array's overhead space doesn't need an extra check, since palloc itself will catch a value between 1Gb and 2Gb.) For "nitems", there's no very good reason to sum the inputs at all, since we can perfectly well use ArrayGetNItems' result instead of ignoring it. Per discussion of this bug, also remove redundant zeroing of the nulls bitmap in array_set_element and array_set_slice. Patch by Alexander Lakhin and myself, per bug #17858 from Alexander Lakhin; thanks also to Richard Guo. These bugs are a dozen years old, so back-patch to all supported branches. Discussion: https://postgr.es/m/17858-8fd287fd3663d051@postgresql.org --- src/backend/executor/execExprInterp.c | 13 +++++++++---- src/backend/utils/adt/arrayfuncs.c | 6 ++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 7729117381f..56874a3f78a 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -2817,7 +2817,7 @@ ExecEvalArrayExpr(ExprState *state, ExprEvalStep *op) { /* Must be nested array expressions */ int nbytes = 0; - int nitems = 0; + int nitems; int outer_nelems = 0; int elem_ndims = 0; int *elem_dims = NULL; @@ -2912,9 +2912,14 @@ ExecEvalArrayExpr(ExprState *state, ExprEvalStep *op) subbitmaps[outer_nelems] = ARR_NULLBITMAP(array); subbytes[outer_nelems] = ARR_SIZE(array) - ARR_DATA_OFFSET(array); nbytes += subbytes[outer_nelems]; + /* check for overflow of total request */ + if (!AllocSizeIsValid(nbytes)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%d)", + (int) MaxAllocSize))); subnitems[outer_nelems] = ArrayGetNItems(this_ndims, ARR_DIMS(array)); - nitems += subnitems[outer_nelems]; havenulls |= ARR_HASNULL(array); outer_nelems++; } @@ -2948,7 +2953,7 @@ ExecEvalArrayExpr(ExprState *state, ExprEvalStep *op) } /* check for subscript overflow */ - (void) ArrayGetNItems(ndims, dims); + nitems = ArrayGetNItems(ndims, dims); ArrayCheckBounds(ndims, dims, lbs); if (havenulls) @@ -2962,7 +2967,7 @@ ExecEvalArrayExpr(ExprState *state, ExprEvalStep *op) nbytes += ARR_OVERHEAD_NONULLS(ndims); } - result = (ArrayType *) palloc(nbytes); + result = (ArrayType *) palloc0(nbytes); SET_VARSIZE(result, nbytes); result->ndim = ndims; result->dataoffset = dataoffset; diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 6d00be72b16..cf6ab1d8408 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -2478,8 +2478,7 @@ array_set_element(Datum arraydatum, { bits8 *newnullbitmap = ARR_NULLBITMAP(newarray); - /* Zero the bitmap to take care of marking inserted positions null */ - MemSet(newnullbitmap, 0, (newnitems + 7) / 8); + /* palloc0 above already marked any inserted positions as nulls */ /* Fix the inserted value */ if (addedafter) array_set_isnull(newnullbitmap, newnitems - 1, isNull); @@ -3127,8 +3126,7 @@ array_set_slice(Datum arraydatum, bits8 *newnullbitmap = ARR_NULLBITMAP(newarray); bits8 *oldnullbitmap = ARR_NULLBITMAP(array); - /* Zero the bitmap to handle marking inserted positions null */ - MemSet(newnullbitmap, 0, (nitems + 7) / 8); + /* palloc0 above already marked any inserted positions as nulls */ array_bitmap_copy(newnullbitmap, addedbefore, oldnullbitmap, 0, itemsbefore); From 19f54ea2a3a2885235d2220f1c98ad15d876c5cd Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 27 Mar 2023 15:04:02 -0400 Subject: [PATCH 46/61] Reject attempts to alter composite types used in indexes. find_composite_type_dependencies() ignored indexes, which is a poor decision because an expression index could have a stored column of a composite (or other container) type even when the underlying table does not. Teach it to detect such cases and error out. We have to work a bit harder than for other relations because the pg_depend entry won't identify the specific index column of concern, but it's not much new code. This does not address bug #17872's original complaint that dropping a column in such a type might lead to violations of the uniqueness property that a unique index is supposed to ensure. That seems of much less concern to me because it won't lead to crashes. Per bug #17872 from Alexander Lakhin. Back-patch to all supported branches. Discussion: https://postgr.es/m/17872-d0fbb799dc3fd85d@postgresql.org --- src/backend/commands/tablecmds.c | 57 +++++++++++++++++++---- src/test/regress/expected/alter_table.out | 10 +++- src/test/regress/sql/alter_table.sql | 11 ++++- 3 files changed, 68 insertions(+), 10 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 29a7a57e681..9bdebc6e29b 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8100,6 +8100,7 @@ find_composite_type_dependencies(Oid typeOid, Relation origRelation, { Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup); Relation rel; + TupleDesc tupleDesc; Form_pg_attribute att; /* Check for directly dependent types */ @@ -8116,18 +8117,58 @@ find_composite_type_dependencies(Oid typeOid, Relation origRelation, continue; } - /* Else, ignore dependees that aren't user columns of relations */ - /* (we assume system columns are never of interesting types) */ - if (pg_depend->classid != RelationRelationId || - pg_depend->objsubid <= 0) + /* Else, ignore dependees that aren't relations */ + if (pg_depend->classid != RelationRelationId) continue; rel = relation_open(pg_depend->objid, AccessShareLock); - att = TupleDescAttr(rel->rd_att, pg_depend->objsubid - 1); + tupleDesc = RelationGetDescr(rel); - if (rel->rd_rel->relkind == RELKIND_RELATION || - rel->rd_rel->relkind == RELKIND_MATVIEW || - rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + /* + * If objsubid identifies a specific column, refer to that in error + * messages. Otherwise, search to see if there's a user column of the + * type. (We assume system columns are never of interesting types.) + * The search is needed because an index containing an expression + * column of the target type will just be recorded as a whole-relation + * dependency. If we do not find a column of the type, the dependency + * must indicate that the type is transiently referenced in an index + * expression but not stored on disk, which we assume is OK, just as + * we do for references in views. (It could also be that the target + * type is embedded in some container type that is stored in an index + * column, but the previous recursion should catch such cases.) + */ + if (pg_depend->objsubid > 0 && pg_depend->objsubid <= tupleDesc->natts) + att = TupleDescAttr(tupleDesc, pg_depend->objsubid - 1); + else + { + att = NULL; + for (int attno = 1; attno <= tupleDesc->natts; attno++) + { + att = TupleDescAttr(tupleDesc, attno - 1); + if (att->atttypid == typeOid && !att->attisdropped) + break; + att = NULL; + } + if (att == NULL) + { + /* No such column, so assume OK */ + relation_close(rel, AccessShareLock); + continue; + } + } + + /* + * We definitely should reject if the relation has storage. If it's + * partitioned, then perhaps we don't have to reject: if there are + * partitions then we'll fail when we find one, else there is no + * stored data to worry about. However, it's possible that the type + * change would affect conclusions about whether the type is sortable + * or hashable and thus (if it's a partitioning column) break the + * partitioning rule. For now, reject for partitioned rels too. + */ + if (RELKIND_HAS_STORAGE(rel->rd_rel->relkind) || + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE || + rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) { if (origTypeName) ereport(ERROR, diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index cbc578e3586..9b203cbc4ec 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -3198,6 +3198,13 @@ CREATE TYPE test_type1 AS (a int, b text); CREATE TABLE test_tbl1 (x int, y test_type1); ALTER TYPE test_type1 ALTER ATTRIBUTE b TYPE varchar; -- fails ERROR: cannot alter type "test_type1" because column "test_tbl1.y" uses it +DROP TABLE test_tbl1; +CREATE TABLE test_tbl1 (x int, y text); +CREATE INDEX test_tbl1_idx ON test_tbl1((row(x,y)::test_type1)); +ALTER TYPE test_type1 ALTER ATTRIBUTE b TYPE varchar; -- fails +ERROR: cannot alter type "test_type1" because column "test_tbl1_idx.row" uses it +DROP TABLE test_tbl1; +DROP TYPE test_type1; CREATE TYPE test_type2 AS (a int, b text); CREATE TABLE test_tbl2 OF test_type2; CREATE TABLE test_tbl2_subclass () INHERITS (test_tbl2); @@ -3315,7 +3322,8 @@ Distributed by: (aa) Inherits: test_tbl2 Distributed by: (aa) -DROP TABLE test_tbl2_subclass; +DROP TABLE test_tbl2_subclass, test_tbl2; +DROP TYPE test_type2; CREATE TYPE test_typex AS (a int, b text); CREATE TABLE test_tblx (x int, y test_typex check ((y).a > 0)); ALTER TYPE test_typex DROP ATTRIBUTE a; -- fails diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql index 9da0e5603ea..1c6fd62a117 100644 --- a/src/test/regress/sql/alter_table.sql +++ b/src/test/regress/sql/alter_table.sql @@ -2019,6 +2019,14 @@ CREATE TYPE test_type1 AS (a int, b text); CREATE TABLE test_tbl1 (x int, y test_type1); ALTER TYPE test_type1 ALTER ATTRIBUTE b TYPE varchar; -- fails +DROP TABLE test_tbl1; +CREATE TABLE test_tbl1 (x int, y text); +CREATE INDEX test_tbl1_idx ON test_tbl1((row(x,y)::test_type1)); +ALTER TYPE test_type1 ALTER ATTRIBUTE b TYPE varchar; -- fails + +DROP TABLE test_tbl1; +DROP TYPE test_type1; + CREATE TYPE test_type2 AS (a int, b text); CREATE TABLE test_tbl2 OF test_type2; CREATE TABLE test_tbl2_subclass () INHERITS (test_tbl2); @@ -2046,7 +2054,8 @@ ALTER TYPE test_type2 RENAME ATTRIBUTE a TO aa CASCADE; \d test_tbl2 \d test_tbl2_subclass -DROP TABLE test_tbl2_subclass; +DROP TABLE test_tbl2_subclass, test_tbl2; +DROP TYPE test_type2; CREATE TYPE test_typex AS (a int, b text); CREATE TABLE test_tblx (x int, y test_typex check ((y).a > 0)); From 2b155a04cbd81915c0e991f72d99c4baa430a8d5 Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Mon, 27 Mar 2023 21:35:27 +0200 Subject: [PATCH 47/61] doc: Fix XML_CATALOG_FILES env var for Apple Silicon machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Homebrew changed the prefix for Apple Silicon based machines, so our advice for XML_CATALOG_FILES needs to mention both. More info on the Homebrew change can be found at: https://github.com/Homebrew/brew/issues/9177 This is backpatch of commits 4c8d65408 and 5a91c7975, the latter which contained a small fix based on a report from Dagfinn Ilmari Mannsåker. Author: Julien Rouhaud Discussion: https://postgr.es/m/20230327082441.h7pa2vqiobbyo7rd@jrouhaud --- doc/src/sgml/docguide.sgml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/docguide.sgml b/doc/src/sgml/docguide.sgml index 55ef6417749..2d8d5cd64f2 100644 --- a/doc/src/sgml/docguide.sgml +++ b/doc/src/sgml/docguide.sgml @@ -209,9 +209,13 @@ brew install docbook docbook-xsl libxslt fop The Homebrew-supplied programs require the following environment variable - to be set: + to be set. For Intel based machines, use this: export XML_CATALOG_FILES=/usr/local/etc/xml/catalog + + On Apple Silicon based machines, use this: + +export XML_CATALOG_FILES=/opt/homebrew/etc/xml/catalog Without it, xsltproc will throw errors like this: From f98f014860fd0f4ffdf1ae5f3db3536225fcd68c Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 28 Mar 2023 16:16:53 -0400 Subject: [PATCH 48/61] amcheck: In verify_heapam, allows tuples with xmin 0. Commit e88754a1965c0f40a723e6e46d670cacda9e19bd caused that case to be reported as corruption, but Peter Geoghegan pointed out that it can legitimately happen in the case of a speculative insertion that aborts, so we'd better not flag it as corruption after all. Back-patch to v14, like the commit that introduced the issue. Discussion: http://postgr.es/m/CAH2-WzmEabzcPTxSY-NXKH6Qt3FkAPYHGQSe2PtvGgj17ZQkCw@mail.gmail.com --- contrib/amcheck/verify_heapam.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index cc49ccb26f6..1cda7b8f45c 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -775,8 +775,7 @@ check_tuple_visibility(HeapCheckContext *ctx) switch (get_xid_status(xmin, ctx, &xmin_status)) { case XID_INVALID: - report_corruption(ctx, - pstrdup("xmin is invalid")); + /* Could be the result of a speculative insertion that aborted. */ return false; case XID_BOUNDS_OK: break; From cd83e8232f418d1328c6b31e9c825bb6135cd382 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 29 Mar 2023 11:31:30 -0400 Subject: [PATCH 49/61] Fix dereference of dangling pointer in GiST index buffering build. gistBuildCallback tried to fetch the size of an index tuple that might have already been freed by gistProcessEmptyingQueue. While this seems to usually be harmless in production builds, in principle it could result in a SIGSEGV, or more likely a bogus value for indtuplesSize leading to poor page-split decisions later in the build. The memory management here is confusing and could stand to be refactored, but for the moment it seems to be enough to fetch the tuple size sooner. AFAICT the indtuples[Size] totals aren't used in between these places; even if they were, the updated values shouldn't be any worse to use. So just move the incrementing of the totals up. It's not very clear why our valgrind-using buildfarm animals haven't noticed this problem, because the relevant code path does seem to be exercised according to the code coverage report. I think the reason that we didn't fix this bug after the first report is that I'd wanted to try to understand that better. However, now that it's been re-discovered let's just be pragmatic and fix it already. Original report by Alexander Lakhin (bug #16329), later rediscovered by Egor Chindyaskin (bug #17874). Patch by Alexander Lakhin (commentary by Pavel Borisov and me). Back-patch to all supported branches. Discussion: https://postgr.es/m/16329-7a6aa9b6fa1118a1@postgresql.org Discussion: https://postgr.es/m/17874-63ca6c7ce42d2103@postgresql.org --- src/backend/access/gist/gistbuild.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index d281d89000f..907cc83857f 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -844,6 +844,19 @@ gistBuildCallback(Relation index, true); itup->t_tid = *tid; + /* Update tuple count and total size. */ + buildstate->indtuples += 1; + buildstate->indtuplesSize += IndexTupleSize(itup); + + /* + * XXX In buffering builds, the tempCxt is also reset down inside + * gistProcessEmptyingQueue(). This is not great because it risks + * confusion and possible use of dangling pointers (for example, itup + * might be already freed when control returns here). It's generally + * better that a memory context be "owned" by only one function. However, + * currently this isn't causing issues so it doesn't seem worth the amount + * of refactoring that would be needed to avoid it. + */ if (buildstate->buildMode == GIST_BUFFERING_ACTIVE) { /* We have buffers, so use them. */ @@ -859,10 +872,6 @@ gistBuildCallback(Relation index, buildstate->giststate, buildstate->heaprel, true); } - /* Update tuple count and total size. */ - buildstate->indtuples += 1; - buildstate->indtuplesSize += IndexTupleSize(itup); - MemoryContextSwitchTo(oldCtx); MemoryContextReset(buildstate->giststate->tempCxt); From 4544859f732c4c98b711433f0e03e85c60a4b129 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 31 Mar 2023 12:14:04 +1300 Subject: [PATCH 50/61] Fix List memory issue in transformColumnDefinition When calling generateSerialExtraStmts(), we would pass in the constraint->options. In some cases, generateSerialExtraStmts() would modify the referenced List to remove elements from it, but doing so is invalid without assigning the list back to all variables that point to it. In the particular reported problem case, the List became empty, in which cases it became NIL, but the passed in constraint->options didn't get to find out about that and was left pointing to free'd memory. To fix this, just perform a list_copy() inside generateSerialExtraStmts(). We could just do a list_copy() just before we perform the delete from the list, however, that seems less robust. Let's make sure the generated CreateSeqStmt gets a completely different copy of the list to be safe. Bug: #17879 Reported-by: Fei Changhong Diagnosed-by: Fei Changhong Discussion: https://postgr.es/m/17879-b7dfb5debee58ff5@postgresql.org Backpatch-through: 11, all supported versions --- src/backend/parser/parse_utilcmd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 223c6fcb48b..358ac56da30 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -476,6 +476,9 @@ generateSerialExtraStmts(CreateStmtContext *cxt, ColumnDef *column, int nameEl_idx = -1; + /* Make a copy of this as we may end up modifying it in the code below */ + seqoptions = list_copy(seqoptions); + /* * Determine namespace and name to use for the sequence. * From 316dc347264bb20b3f7d829f6fdedf5abd4b1769 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 31 Mar 2023 10:08:40 -0400 Subject: [PATCH 51/61] Ensure acquire_inherited_sample_rows sets its output parameters. The totalrows/totaldeadrows outputs were left uninitialized in cases where we find no analyzable child tables of a partitioned table. This could lead to setting the partitioned table's pg_class.reltuples value to garbage. It's not clear that that would have any very bad effects in practice, but fix it anyway because it's making valgrind unhappy. Reported and diagnosed by Alexander Lakhin (bug #17880). Discussion: https://postgr.es/m/17880-9282037c923d856e@postgresql.org --- src/backend/commands/analyze.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 00fc25b2439..8d329d44503 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -2007,6 +2007,10 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, ListCell *lc; bool has_child; + /* Initialize output parameters to zero now, in case we exit early */ + *totalrows = 0; + *totaldeadrows = 0; + /* * Find all members of inheritance set. We only need AccessShareLock on * the children. @@ -2158,8 +2162,6 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_TOTAL, nrels); numrows = 0; - *totalrows = 0; - *totaldeadrows = 0; for (i = 0; i < nrels; i++) { Relation childrel = rels[i]; From c8f8915363bb97b7183fe6f64e5552938999cbc3 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 31 Mar 2023 11:18:49 -0400 Subject: [PATCH 52/61] Reject system columns as elements of foreign keys. Up through v11 it was sensible to use the "oid" system column as a foreign key column, but since that was removed there's no visible usefulness in making any of the remaining system columns a foreign key. Moreover, since the TupleTableSlot rewrites in v12, such cases actively fail because of implicit assumptions that only user columns appear in foreign keys. The lack of complaints about that seems like good evidence that no one is trying to do it. Hence, rather than trying to repair those assumptions (of which there are at least two, maybe more), let's just forbid the case up front. Per this patch, a system column in either the referenced or referencing side of a foreign key will draw this error; however, putting one in the referenced side would have failed later anyway, since we don't allow unique indexes to be made on system columns. Per bug #17877 from Alexander Lakhin. Back-patch to v12; the case still appears to work in v11, so we shouldn't break it there. Discussion: https://postgr.es/m/17877-4bcc658e33df6de1@postgresql.org --- src/backend/commands/tablecmds.c | 15 +++++++++++++-- src/test/regress/expected/foreign_key.out | 11 ++++++----- src/test/regress/sql/foreign_key.sql | 7 ++++--- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 9bdebc6e29b..34c910565a9 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -12893,6 +12893,11 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName, * transformColumnNameList - transform list of column names * * Lookup each name and return its attnum and type OID + * + * Note: the name of this function suggests that it's general-purpose, + * but actually it's only used to look up names appearing in foreign-key + * clauses. The error messages would need work to use it in other cases, + * and perhaps the validity checks as well. */ static int transformColumnNameList(Oid relId, List *colList, @@ -12906,6 +12911,7 @@ transformColumnNameList(Oid relId, List *colList, { char *attname = strVal(lfirst(l)); HeapTuple atttuple; + Form_pg_attribute attform; atttuple = SearchSysCacheAttName(relId, attname); if (!HeapTupleIsValid(atttuple)) @@ -12913,13 +12919,18 @@ transformColumnNameList(Oid relId, List *colList, (errcode(ERRCODE_UNDEFINED_COLUMN), errmsg("column \"%s\" referenced in foreign key constraint does not exist", attname))); + attform = (Form_pg_attribute) GETSTRUCT(atttuple); + if (attform->attnum < 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("system columns cannot be used in foreign keys"))); if (attnum >= INDEX_MAX_KEYS) ereport(ERROR, (errcode(ERRCODE_TOO_MANY_COLUMNS), errmsg("cannot have more than %d keys in a foreign key", INDEX_MAX_KEYS))); - attnums[attnum] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum; - atttypids[attnum] = ((Form_pg_attribute) GETSTRUCT(atttuple))->atttypid; + attnums[attnum] = attform->attnum; + atttypids[attnum] = attform->atttypid; ReleaseSysCache(atttuple); attnum++; } diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out index 8ea6434e222..ce86e400bcb 100644 --- a/src/test/regress/expected/foreign_key.out +++ b/src/test/regress/expected/foreign_key.out @@ -757,15 +757,16 @@ SELECT * from FKTABLE; DROP TABLE FKTABLE; DROP TABLE PKTABLE; -CREATE TABLE PKTABLE (ptest1 int PRIMARY KEY); +-- Test some invalid FK definitions +CREATE TABLE PKTABLE (ptest1 int PRIMARY KEY, someoid oid); CREATE TABLE FKTABLE_FAIL1 ( ftest1 int, CONSTRAINT fkfail1 FOREIGN KEY (ftest2) REFERENCES PKTABLE); ERROR: column "ftest2" referenced in foreign key constraint does not exist CREATE TABLE FKTABLE_FAIL2 ( ftest1 int, CONSTRAINT fkfail1 FOREIGN KEY (ftest1) REFERENCES PKTABLE(ptest2)); ERROR: column "ptest2" referenced in foreign key constraint does not exist -DROP TABLE FKTABLE_FAIL1; -ERROR: table "fktable_fail1" does not exist -DROP TABLE FKTABLE_FAIL2; -ERROR: table "fktable_fail2" does not exist +CREATE TABLE FKTABLE_FAIL3 ( ftest1 int, CONSTRAINT fkfail1 FOREIGN KEY (tableoid) REFERENCES PKTABLE(someoid)); +ERROR: system columns cannot be used in foreign keys +CREATE TABLE FKTABLE_FAIL4 ( ftest1 oid, CONSTRAINT fkfail1 FOREIGN KEY (ftest1) REFERENCES PKTABLE(tableoid)); +ERROR: system columns cannot be used in foreign keys DROP TABLE PKTABLE; -- Test for referencing column number smaller than referenced constraint CREATE TABLE PKTABLE (ptest1 int, ptest2 int, UNIQUE(ptest1, ptest2)); diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql index a8c9e77b800..426d10bf861 100644 --- a/src/test/regress/sql/foreign_key.sql +++ b/src/test/regress/sql/foreign_key.sql @@ -463,12 +463,13 @@ SELECT * from FKTABLE; DROP TABLE FKTABLE; DROP TABLE PKTABLE; -CREATE TABLE PKTABLE (ptest1 int PRIMARY KEY); +-- Test some invalid FK definitions +CREATE TABLE PKTABLE (ptest1 int PRIMARY KEY, someoid oid); CREATE TABLE FKTABLE_FAIL1 ( ftest1 int, CONSTRAINT fkfail1 FOREIGN KEY (ftest2) REFERENCES PKTABLE); CREATE TABLE FKTABLE_FAIL2 ( ftest1 int, CONSTRAINT fkfail1 FOREIGN KEY (ftest1) REFERENCES PKTABLE(ptest2)); +CREATE TABLE FKTABLE_FAIL3 ( ftest1 int, CONSTRAINT fkfail1 FOREIGN KEY (tableoid) REFERENCES PKTABLE(someoid)); +CREATE TABLE FKTABLE_FAIL4 ( ftest1 oid, CONSTRAINT fkfail1 FOREIGN KEY (ftest1) REFERENCES PKTABLE(tableoid)); -DROP TABLE FKTABLE_FAIL1; -DROP TABLE FKTABLE_FAIL2; DROP TABLE PKTABLE; -- Test for referencing column number smaller than referenced constraint From 5e43f84cb184c0e08a01d7bdf3d1602e9d44d52d Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 5 Apr 2023 07:59:52 +0900 Subject: [PATCH 53/61] doc: Add more details about pg_stat_get_xact_blocks_{fetched,hit} The explanation describing the dependency to system read() calls for these two functions has been removed in ddfc2d9. And after more discussion about d69c404, we have concluded that adding more details makes them easier to understand. While on it, use the term "block read requests" (maybe found in cache) rather than "buffers fetched" and "buffer hits". Per discussion with Melanie Plageman, Kyotaro Horiguchi, Bertrand Drouvot and myself. Discussion: https://postgr.es/m/CAAKRu_ZmdiScT4q83OAbfmR5AH-L5zWya3SXjaxiJvhCob-e2A@mail.gmail.com Backpatch-through: 11 --- doc/src/sgml/monitoring.sgml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 3c56f9fa4f1..44239b26775 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -5060,8 +5060,11 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i bigint - Returns the number of buffers fetched for table or index, in the current - transaction. + Returns the number of block read requests for table or index, in the + current transaction. This number minus + pg_stat_get_xact_blocks_hit gives the number of + kernel read() calls; the number of actual + physical reads is usually lower due to kernel-level buffering. @@ -5074,8 +5077,9 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i bigint - Returns the number of buffer hits for table or index, in the current - transaction. + Returns the number of block read requests for table or index, in the + current transaction, found in cache (not triggering kernel + read() calls). From 4663ccf7dff35e322ebcaf37972e788e8938ff68 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Wed, 5 Apr 2023 14:16:19 +0700 Subject: [PATCH 54/61] doc: Update error messages in RLS examples Since 8b9e9644d, the messages for failed permissions checks report "table" where appropriate, rather than "relation". Backpatch to all supported branches --- doc/src/sgml/ddl.sgml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml index 78f4448a5bc..53ca87cc020 100644 --- a/doc/src/sgml/ddl.sgml +++ b/doc/src/sgml/ddl.sgml @@ -2457,7 +2457,7 @@ postgres=> table passwd; postgres=> set role alice; SET postgres=> table passwd; -ERROR: permission denied for relation passwd +ERROR: permission denied for table passwd postgres=> select user_name,real_name,home_phone,extra_info,home_dir,shell from passwd; user_name | real_name | home_phone | extra_info | home_dir | shell -----------+-----------+--------------+------------+-------------+----------- @@ -2467,7 +2467,7 @@ postgres=> select user_name,real_name,home_phone,extra_info,home_dir,shell fr (3 rows) postgres=> update passwd set user_name = 'joe'; -ERROR: permission denied for relation passwd +ERROR: permission denied for table passwd -- Alice is allowed to change her own real_name, but no others postgres=> update passwd set real_name = 'Alice Doe'; UPDATE 1 @@ -2476,9 +2476,9 @@ UPDATE 0 postgres=> update passwd set shell = '/bin/xx'; ERROR: new row violates WITH CHECK OPTION for "passwd" postgres=> delete from passwd; -ERROR: permission denied for relation passwd +ERROR: permission denied for table passwd postgres=> insert into passwd (user_name) values ('xxx'); -ERROR: permission denied for relation passwd +ERROR: permission denied for table passwd -- Alice can change her own password; RLS silently prevents updating other rows postgres=> update passwd set pwhash = 'abc'; UPDATE 1 From 5d1c6c21c055151264aa668a46f94dd1dcb7177b Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 6 Apr 2023 15:52:37 -0400 Subject: [PATCH 55/61] Fix ts_headline() edge cases for empty query and empty search text. tsquery's GETQUERY() macro is only safe to apply to a tsquery that is known non-empty; otherwise it gives a pointer to garbage. Before commit 5a617d75d, ts_headline() avoided this pitfall, but only in a very indirect, nonobvious way. (hlCover could not reach its TS_execute call, because if the query contains no lexemes then hlFirstIndex would surely return -1.) After that commit, it fell into the trap, resulting in weird errors such as "unrecognized operator" and/or valgrind complaints. In HEAD, fix this by not calling TS_execute_locations() at all for an empty query. In the back branches, add a defensive check to hlCover() --- that's not fixing any live bug, but I judge the code a bit too fragile as-is. Also, both mark_hl_fragments() and mark_hl_words() were careless about the possibility of empty search text: in the cases where no match has been found, they'd end up telling mark_fragment() to mark from word indexes 0 to 0 inclusive, even when there is no word 0. This is harmless since we over-allocated the prs->words array, but it does annoy valgrind. Fix so that the end index is -1 and thus mark_fragment() will do nothing in such cases. Bottom line is that this fixes a live bug in HEAD, but in the back branches it's only getting rid of a valgrind nitpick. Back-patch anyway. Per report from Alexander Lakhin. Discussion: https://postgr.es/m/c27f642d-020b-01ff-ae61-086af287c4fd@gmail.com --- src/backend/tsearch/wparser_def.c | 8 ++++++-- src/test/regress/expected/tsearch.out | 21 +++++++++++++++++++++ src/test/regress/sql/tsearch.sql | 6 ++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index a3e5baf9782..98029e78fb4 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -2039,6 +2039,9 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int max_cover, nextpmax; hlCheck ch; + if (query->size <= 0) + return false; /* empty query matches nothing */ + /* * We look for the earliest, shortest substring of prs->words that * satisfies the query. Both the pmin and pmax indices must be words @@ -2343,7 +2346,8 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall, /* show the first min_words words if we have not marked anything */ if (num_f <= 0) { - startpos = endpos = curlen = 0; + startpos = curlen = 0; + endpos = -1; for (i = 0; i < prs->curwords && curlen < min_words; i++) { if (!NONWORDTOKEN(prs->words[i].type)) @@ -2498,7 +2502,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall, if (bestlen < 0) { curlen = 0; - pose = 0; + pose = -1; for (i = 0; i < prs->curwords && curlen < min_words; i++) { if (!NONWORDTOKEN(prs->words[i].type)) diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 27e71bb15d6..5f712a5484e 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -2012,6 +2012,27 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'), Lorem ipsum urna. Nullam nullam ullamcorper urna (1 row) +-- Edge cases with empty query +SELECT ts_headline('english', +'', ''::tsquery); +NOTICE: text-search query doesn't contain lexemes: "" +LINE 2: '', ''::tsquery); + ^ + ts_headline +------------- + +(1 row) + +SELECT ts_headline('english', +'foo bar', ''::tsquery); +NOTICE: text-search query doesn't contain lexemes: "" +LINE 2: 'foo bar', ''::tsquery); + ^ + ts_headline +------------- + foo bar +(1 row) + --Rewrite sub system CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT); \set ECHO none diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index af5004602a8..80a3e0558b9 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -550,6 +550,12 @@ SELECT ts_headline('english', to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'), 'MaxFragments=100, MaxWords=100, MinWords=1'); +-- Edge cases with empty query +SELECT ts_headline('english', +'', ''::tsquery); +SELECT ts_headline('english', +'foo bar', ''::tsquery); + --Rewrite sub system CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT); From 3196a4f9ea24df654fb63b7037313007f73db311 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 6 Apr 2023 18:13:49 -0400 Subject: [PATCH 56/61] Stabilize just-added regression test cases. The tests added by commits 029dea882 et al turn out to produce different output under -DRANDOMIZE_ALLOCATED_MEMORY. This is not a bug exactly: that flag causes coerce_type() to invoke the input function twice when coercing an unknown-type literal to a specific type. So you get tsqueryin's bleat about an empty tsquery twice. Revise the test query to avoid that. Discussion: https://postgr.es/m/20230406213813.uep7plg6lvcywujo@awork3.anarazel.de --- src/test/regress/expected/tsearch.out | 8 ++------ src/test/regress/sql/tsearch.sql | 4 ++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 5f712a5484e..36ae94a5255 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -2014,20 +2014,16 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'), -- Edge cases with empty query SELECT ts_headline('english', -'', ''::tsquery); +'', to_tsquery('english', '')); NOTICE: text-search query doesn't contain lexemes: "" -LINE 2: '', ''::tsquery); - ^ ts_headline ------------- (1 row) SELECT ts_headline('english', -'foo bar', ''::tsquery); +'foo bar', to_tsquery('english', '')); NOTICE: text-search query doesn't contain lexemes: "" -LINE 2: 'foo bar', ''::tsquery); - ^ ts_headline ------------- foo bar diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index 80a3e0558b9..e7e2aa907f7 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -552,9 +552,9 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'), -- Edge cases with empty query SELECT ts_headline('english', -'', ''::tsquery); +'', to_tsquery('english', '')); SELECT ts_headline('english', -'foo bar', ''::tsquery); +'foo bar', to_tsquery('english', '')); --Rewrite sub system From b103dcc04389538acca42279bf03205266c08d7b Mon Sep 17 00:00:00 2001 From: Stephen Frost Date: Fri, 7 Apr 2023 19:36:12 -0400 Subject: [PATCH 57/61] For Kerberos testing, disable reverse DNS lookup In our Kerberos test suite, there isn't much need to worry about the normal canonicalization that Kerberos provides by looking up the reverse DNS for the IP address connected to, and in some cases it can actively cause problems (eg: a captive portal wifi where the normally not resolvable localhost address used ends up being resolved anyway, and not to the domain we are using for testing, causing the entire regression test to fail with errors about not being able to get a TGT for the remote realm for cross-realm trust). Therefore, disable it by adding rdns = false into the krb5.conf that's generated for the test. Reviewed-By: Heikki Linnakangas Discussion: https://postgr.es/m/Y/QD2zDkDYQA1GQt@tamriel.snowman.net --- src/test/kerberos/t/001_auth.pl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/test/kerberos/t/001_auth.pl b/src/test/kerberos/t/001_auth.pl index bd55339c761..b2a733f90fa 100644 --- a/src/test/kerberos/t/001_auth.pl +++ b/src/test/kerberos/t/001_auth.pl @@ -93,6 +93,17 @@ or BAIL_OUT("could not get Kerberos version"); $krb5_version = $1; +# Build the krb5.conf to use. +# +# Explicitly specify the default (test) realm and the KDC for +# that realm to avoid the Kerberos library trying to look up +# that information in DNS, and also because we're using a +# non-standard KDC port. +# +# Reverse DNS is explicitly disabled to avoid any issue with a +# captive portal or other cases where the reverse DNS succeeds +# and the Kerberos library uses that as the canonical name of +# the host and then tries to acquire a cross-realm ticket. append_to_file( $krb5_conf, qq![logging] @@ -101,6 +112,7 @@ [libdefaults] default_realm = $realm +rdns = false [realms] $realm = { From fd27f65690f8cd3b58636b1aae69d6419f95f0ec Mon Sep 17 00:00:00 2001 From: Stephen Frost Date: Fri, 7 Apr 2023 19:36:12 -0400 Subject: [PATCH 58/61] For Kerberos testing, disable DNS lookups Similar to 8dff2f224, this disables DNS lookups by the Kerberos library to look up the KDC and the realm while the Kerberos tests are running. In some environments, these lookups can take a long time and end up timing out and causing tests to fail. Further, since this isn't really our domain, we shouldn't be sending out these DNS requests during our tests. --- src/test/kerberos/t/001_auth.pl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/test/kerberos/t/001_auth.pl b/src/test/kerberos/t/001_auth.pl index b2a733f90fa..6ced0b1faf4 100644 --- a/src/test/kerberos/t/001_auth.pl +++ b/src/test/kerberos/t/001_auth.pl @@ -100,6 +100,11 @@ # that information in DNS, and also because we're using a # non-standard KDC port. # +# Also explicitly disable DNS lookups since this isn't really +# our domain and we shouldn't be causing random DNS requests +# to be sent out (not to mention that broken DNS environments +# can cause the tests to take an extra long time and timeout). +# # Reverse DNS is explicitly disabled to avoid any issue with a # captive portal or other cases where the reverse DNS succeeds # and the Kerberos library uses that as the canonical name of @@ -111,6 +116,8 @@ kdc = FILE:$kdc_log [libdefaults] +dns_lookup_realm = false +dns_lookup_kdc = false default_realm = $realm rdns = false From 80337718ae226884b253a9c8f40c2ed69085162e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 10 Apr 2023 13:09:18 -0400 Subject: [PATCH 59/61] Doc: adjust examples of EXTRACT() output to match current reality. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EXTRACT(EPOCH), EXTRACT(SECOND), and some related cases print more trailing zeroes than they used to. This behavior change happened with commit a2da77cdb (Change return type of EXTRACT to numeric), and it was intentional according to the commit log: - Return values when extracting fields with possibly fractional values, such as second and epoch, now have the full scale that the value has internally (so, for example, '1.000000' instead of just '1'). It's been like that for two releases now, so while I suggested changing this back, it's probably better to adjust the documentation examples. Per bug #17866 from Евгений Жужнев. Back-patch to v14 where the change came in. Discussion: https://postgr.es/m/17866-18eb70095b1594e2@postgresql.org --- doc/src/sgml/func.sgml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index ba8bccc135d..a7b91c033a4 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -9389,11 +9389,11 @@ SELECT timestamp with time zone '2005-04-02 12:00:00-07' + interval '24 hours'; SELECT EXTRACT(EPOCH FROM timestamptz '2013-07-01 12:00:00') - EXTRACT(EPOCH FROM timestamptz '2013-03-01 12:00:00'); -Result: 10537200 +Result: 10537200.000000 SELECT (EXTRACT(EPOCH FROM timestamptz '2013-07-01 12:00:00') - EXTRACT(EPOCH FROM timestamptz '2013-03-01 12:00:00')) / 60 / 60 / 24; -Result: 121.958333333333 +Result: 121.9583333333333333 SELECT timestamptz '2013-07-01 12:00:00' - timestamptz '2013-03-01 12:00:00'; Result: 121 days 23:00:00 SELECT age(timestamptz '2013-07-01 12:00:00', timestamptz '2013-03-01 12:00:00'); @@ -9539,13 +9539,13 @@ SELECT EXTRACT(DOY FROM TIMESTAMP '2001-02-16 20:38:40'); SELECT EXTRACT(EPOCH FROM TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40.12-08'); -Result: 982384720.12 +Result: 982384720.120000 SELECT EXTRACT(EPOCH FROM TIMESTAMP '2001-02-16 20:38:40.12'); -Result: 982355920.12 +Result: 982355920.120000 SELECT EXTRACT(EPOCH FROM INTERVAL '5 days 3 hours'); -Result: 442800 +Result: 442800.000000 @@ -9692,7 +9692,7 @@ SELECT EXTRACT(MILLENNIUM FROM TIMESTAMP '2001-02-16 20:38:40'); SELECT EXTRACT(MILLISECONDS FROM TIME '17:12:28.5'); -Result: 28500 +Result: 28500.000 @@ -9756,10 +9756,10 @@ SELECT EXTRACT(QUARTER FROM TIMESTAMP '2001-02-16 20:38:40'); SELECT EXTRACT(SECOND FROM TIMESTAMP '2001-02-16 20:38:40'); -Result: 40 +Result: 40.000000 SELECT EXTRACT(SECOND FROM TIME '17:12:28.5'); -Result: 28.5 +Result: 28.500000 From d4a2acfc28f10ffc5009204669f7cb3147eb61db Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 10 Apr 2023 15:49:48 -0400 Subject: [PATCH 60/61] Doc: add missed entries in BRIN extensibility tables. The tables in "71.3. Extensibility" listing the support functions for bloom and minmax-multi opclasses should include the associated options function. While this isn't quite as required as the rest, you need it for full functionality of the opclass. Back-patch to v14 where these functions were added. --- doc/src/sgml/brin.sgml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/src/sgml/brin.sgml b/doc/src/sgml/brin.sgml index 71697155d7c..9c5ffcddf84 100644 --- a/doc/src/sgml/brin.sgml +++ b/doc/src/sgml/brin.sgml @@ -1228,6 +1228,10 @@ typedef struct BrinOpcInfo Support Procedure 4 internal function brin_bloom_union() + + Support Procedure 5 + internal function brin_bloom_options() + Support Procedure 11 function to compute hash of an element @@ -1286,6 +1290,10 @@ typedef struct BrinOpcInfo Support Procedure 4 internal function brin_minmax_multi_union() + + Support Procedure 5 + internal function brin_minmax_multi_options() + Support Procedure 11 function to compute distance between two values (length of a range) From 4ebcaf011f555b046a4e39d0844e7a786bbe0435 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 12 Apr 2023 09:09:58 +0900 Subject: [PATCH 61/61] Fix detection of unseekable files for fseek() and ftello() with MSVC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling fseek() or ftello() on a handle to a non-seeking device such as a pipe or a communications device is not supported. Unfortunately, MSVC's flavor of these routines, _fseeki64() and _ftelli64(), do not return an error when given a pipe as handle. Some of the logic of pg_dump and restore relies on these routines to check if a handle is seekable, causing failures when passing the contents of pg_dump to pg_restore through a pipe, for example. This commit introduces wrappers for fseeko() and ftello() on MSVC so as any callers are able to properly detect the cases of non-seekable handles. This relies mainly on GetFileType(), sharing a bit of code with the MSVC port for fstat(). The code in charge of getting a file type is refactored into a new file called win32common.c, shared by win32stat.c and the new win32fseek.c. It includes the MSVC ports for fseeko() and ftello(). Like 765f5df, this is backpatched down to 14, where the fstat() implementation for MSVC is able to understand about files larger than 4GB in size. Using a TAP test for that is proving to be tricky as IPC::Run handles the pipes by itself, still I have been able to check the fix manually. Reported-by: Daniel Watzinger Author: Juan José Santamaría Flecha, Michael Paquier Discussion: https://postgr.es/m/CAC+AXB26a4EmxM2suXxPpJaGrqAdxracd7hskLg-zxtPB50h7A@mail.gmail.com Backpatch-through: 14 --- configure | 6 +++ configure.ac | 1 + src/include/port/win32_port.h | 12 ++++-- src/port/win32common.c | 68 +++++++++++++++++++++++++++++++ src/port/win32fseek.c | 75 +++++++++++++++++++++++++++++++++++ src/port/win32stat.c | 22 ++-------- src/tools/msvc/Mkvcbuild.pm | 3 +- 7 files changed, 164 insertions(+), 23 deletions(-) create mode 100644 src/port/win32common.c create mode 100644 src/port/win32fseek.c diff --git a/configure b/configure index 99a9961f025..03fac094065 100755 --- a/configure +++ b/configure @@ -20405,6 +20405,12 @@ esac ;; esac + case " $LIBOBJS " in + *" win32common.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS win32common.$ac_objext" + ;; +esac + case " $LIBOBJS " in *" win32env.$ac_objext "* ) ;; *) LIBOBJS="$LIBOBJS win32env.$ac_objext" diff --git a/configure.ac b/configure.ac index a50ca5710c4..5bef7d6dbaa 100644 --- a/configure.ac +++ b/configure.ac @@ -2520,6 +2520,7 @@ if test "$PORTNAME" = "win32"; then AC_LIBOBJ(kill) AC_LIBOBJ(open) AC_LIBOBJ(system) + AC_LIBOBJ(win32common) AC_LIBOBJ(win32env) AC_LIBOBJ(win32error) AC_LIBOBJ(win32security) diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h index 05c5a534420..41427a9c1af 100644 --- a/src/include/port/win32_port.h +++ b/src/include/port/win32_port.h @@ -193,15 +193,21 @@ struct itimerval int setitimer(int which, const struct itimerval *value, struct itimerval *ovalue); +/* Convenience wrapper for GetFileType() */ +extern DWORD pgwin32_get_file_type(HANDLE hFile); + /* * WIN32 does not provide 64-bit off_t, but does provide the functions operating - * with 64-bit offsets. + * with 64-bit offsets. Also, fseek() might not give an error for unseekable + * streams, so harden that function with our version. */ #define pgoff_t __int64 #ifdef _MSC_VER -#define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin) -#define ftello(stream) _ftelli64(stream) +extern int _pgfseeko64(FILE *stream, pgoff_t offset, int origin); +extern pgoff_t _pgftello64(FILE *stream); +#define fseeko(stream, offset, origin) _pgfseeko64(stream, offset, origin) +#define ftello(stream) _pgftello64(stream) #else #ifndef fseeko #define fseeko(stream, offset, origin) fseeko64(stream, offset, origin) diff --git a/src/port/win32common.c b/src/port/win32common.c new file mode 100644 index 00000000000..2fd78f7f936 --- /dev/null +++ b/src/port/win32common.c @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * win32common.c + * Common routines shared among the win32*.c ports. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/port/win32common.c + * + *------------------------------------------------------------------------- + */ + +#ifdef FRONTEND +#include "postgres_fe.h" +#else +#include "postgres.h" +#endif + +#ifdef WIN32 + +/* + * pgwin32_get_file_type + * + * Convenience wrapper for GetFileType() with specific error handling for all the + * port implementations. Returns the file type associated with a HANDLE. + * + * On error, sets errno with FILE_TYPE_UNKNOWN as file type. + */ +DWORD +pgwin32_get_file_type(HANDLE hFile) +{ + DWORD fileType = FILE_TYPE_UNKNOWN; + DWORD lastError; + + errno = 0; + + /* + * When stdin, stdout, and stderr aren't associated with a stream the + * special value -2 is returned: + * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/get-osfhandle + */ + if (hFile == INVALID_HANDLE_VALUE || hFile == (HANDLE) -2) + { + errno = EINVAL; + return FILE_TYPE_UNKNOWN; + } + + fileType = GetFileType(hFile); + lastError = GetLastError(); + + /* + * Invoke GetLastError in order to distinguish between a "valid" return of + * FILE_TYPE_UNKNOWN and its return due to a calling error. In case of + * success, GetLastError() returns NO_ERROR. + */ + if (fileType == FILE_TYPE_UNKNOWN && lastError != NO_ERROR) + { + _dosmaperr(lastError); + return FILE_TYPE_UNKNOWN; + } + + return fileType; +} + +#endif /* WIN32 */ diff --git a/src/port/win32fseek.c b/src/port/win32fseek.c new file mode 100644 index 00000000000..985313c825f --- /dev/null +++ b/src/port/win32fseek.c @@ -0,0 +1,75 @@ +/*------------------------------------------------------------------------- + * + * win32fseek.c + * Replacements for fseeko() and ftello(). + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/port/win32fseek.c + * + *------------------------------------------------------------------------- + */ + +#ifdef FRONTEND +#include "postgres_fe.h" +#else +#include "postgres.h" +#endif + +#if defined(WIN32) && defined(_MSC_VER) + +/* + * _pgfseeko64 + * + * Calling fseek() on a handle to a non-seeking device such as a pipe or + * a communications device is not supported, and fseek() may not return + * an error. This wrapper relies on the file type to check which cases + * are supported. + */ +int +_pgfseeko64(FILE *stream, pgoff_t offset, int origin) +{ + DWORD fileType; + HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(stream)); + + fileType = pgwin32_get_file_type(hFile); + if (errno != 0) + return -1; + + if (fileType == FILE_TYPE_DISK) + return _fseeki64(stream, offset, origin); + else if (fileType == FILE_TYPE_CHAR || fileType == FILE_TYPE_PIPE) + errno = ESPIPE; + else + errno = EINVAL; + + return -1; +} + +/* + * _pgftello64 + * + * Same as _pgfseeko64(). + */ +pgoff_t +_pgftello64(FILE *stream) +{ + DWORD fileType; + HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(stream)); + + fileType = pgwin32_get_file_type(hFile); + if (errno != 0) + return -1; + + if (fileType == FILE_TYPE_DISK) + return _ftelli64(stream); + else if (fileType == FILE_TYPE_CHAR || fileType == FILE_TYPE_PIPE) + errno = ESPIPE; + else + errno = EINVAL; + + return -1; +} + +#endif /* defined(WIN32) && defined(_MSC_VER) */ diff --git a/src/port/win32stat.c b/src/port/win32stat.c index 36c3b171f40..acbf4c7e279 100644 --- a/src/port/win32stat.c +++ b/src/port/win32stat.c @@ -290,33 +290,17 @@ _pgfstat64(int fileno, struct stat *buf) { HANDLE hFile = (HANDLE) _get_osfhandle(fileno); DWORD fileType = FILE_TYPE_UNKNOWN; - DWORD lastError; unsigned short st_mode; - /* - * When stdin, stdout, and stderr aren't associated with a stream the - * special value -2 is returned: - * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/get-osfhandle - */ - if (hFile == INVALID_HANDLE_VALUE || hFile == (HANDLE) -2 || buf == NULL) + if (buf == NULL) { errno = EINVAL; return -1; } - fileType = GetFileType(hFile); - lastError = GetLastError(); - - /* - * Invoke GetLastError in order to distinguish between a "valid" return of - * FILE_TYPE_UNKNOWN and its return due to a calling error. In case of - * success, GetLastError returns NO_ERROR. - */ - if (fileType == FILE_TYPE_UNKNOWN && lastError != NO_ERROR) - { - _dosmaperr(lastError); + fileType = pgwin32_get_file_type(hFile); + if (errno != 0) return -1; - } switch (fileType) { diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index 3a9246e8578..735caa2b6f0 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -115,7 +115,8 @@ sub mkvcbuild pg_strong_random.c pgcheckdir.c pgmkdirp.c pgsleep.c pgstrcasecmp.c pqsignal.c mkdtemp.c qsort.c qsort_arg.c bsearch_arg.c quotes.c system.c strerror.c tar.c thread.c - win32env.c win32error.c win32security.c win32setlocale.c win32stat.c); + win32common.c win32env.c win32error.c win32fseek.c win32security.c + win32setlocale.c win32stat.c); push(@pgportfiles, 'strtof.c') if ($vsVersion < '14.00');