Fix postmaster's behavior during smart shutdown.

author Tom Lane <[email protected]>
Fri, 14 Aug 2020 17:26:57 +0000 (13:26 -0400)
committer Tom Lane <[email protected]>
Fri, 14 Aug 2020 17:26:57 +0000 (13:26 -0400)
diff --git a/doc/src/sgml/ref/pg_ctl-ref.sgml b/doc/src/sgml/ref/pg_ctl-ref.sgml
index e31275a04e27f1bbcaf695a01e67db0bf68984fd..3946fa52eab7c5b305f79f243cd3c873a332c5df 100644 (file)
--- a/doc/src/sgml/ref/pg_ctl-ref.sgml
+++ b/doc/src/sgml/ref/pg_ctl-ref.sgml
@@ -185,8 +185,8 @@ PostgreSQL documentation
     <option>stop</option> mode shuts down the server that is running in
     the specified data directory.  Three different
     shutdown methods can be selected with the <option>-m</option>
-   option.  <quote>Smart</quote> mode waits for all active
-   clients to disconnect and any online backup to finish.
+   option.  <quote>Smart</quote> mode disallows new connections, then waits
+   for all existing clients to disconnect and any online backup to finish.
     If the server is in hot standby, recovery and  replication
     will be terminated once all clients have disconnected.
     <quote>Fast</quote> mode (the default) does not wait for clients to disconnect and
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 38e2c16ac206eba5b5d4b8f38c31ac48eac5aa85..42223c0f61e20fcabc7e272299b0dd623061b34e 100644 (file)
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -148,8 +148,6 @@
  #define BACKEND_TYPE_BGWORKER  0x0008  /* bgworker process */
  #define BACKEND_TYPE_ALL       0x000F  /* OR of all the above */
  
-#define BACKEND_TYPE_WORKER        (BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER)
-
  /*
   * List of active backends (or child processes anyway; we don't actually
   * know whether a given child has become a backend or is still in the
@@ -304,8 +302,7 @@ static bool FatalError = false; /* T if recovering from backend crash */
   * and we switch to PM_RUN state.
   *
   * Normal child backends can only be launched when we are in PM_RUN or
- * PM_HOT_STANDBY state.  (We also allow launch of normal
- * child backends in PM_WAIT_BACKUP state, but only for superusers.)
+ * PM_HOT_STANDBY state.  (connsAllowed can also restrict launching.)
   * In other states we handle connection requests by launching "dead_end"
   * child processes, which will simply send the client an error message and
   * quit.  (We track these in the BackendList so that we can know when they
@@ -319,10 +316,10 @@ static bool FatalError = false; /* T if recovering from backend crash */
   *
   * Notice that this state variable does not distinguish *why* we entered
   * states later than PM_RUN --- Shutdown and FatalError must be consulted
- * to find that out.  FatalError is never true in PM_RECOVERY_* or PM_RUN
- * states, nor in PM_SHUTDOWN states (because we don't enter those states
- * when trying to recover from a crash).  It can be true in PM_STARTUP state,
- * because we don't clear it until we've successfully started WAL redo.
+ * to find that out.  FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
+ * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
+ * states when trying to recover from a crash).  It can be true in PM_STARTUP
+ * state, because we don't clear it until we've successfully started WAL redo.
   */
  typedef enum
  {
@@ -331,8 +328,7 @@ typedef enum
     PM_RECOVERY,                /* in archive recovery mode */
     PM_HOT_STANDBY,             /* in hot standby mode */
     PM_RUN,                     /* normal "database is alive" state */
-   PM_WAIT_BACKUP,             /* waiting for online backup mode to end */
-   PM_WAIT_READONLY,           /* waiting for read only backends to exit */
+   PM_STOP_BACKENDS,           /* need to stop remaining backends */
     PM_WAIT_BACKENDS,           /* waiting for live backends to exit */
     PM_SHUTDOWN,                /* waiting for checkpointer to do shutdown
                                  * ckpt */
@@ -344,6 +340,21 @@ typedef enum
  
  static PMState pmState = PM_INIT;
  
+/*
+ * While performing a "smart shutdown", we restrict new connections but stay
+ * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone.
+ * connsAllowed is a sub-state indicator showing the active restriction.
+ * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY.
+ */
+typedef enum
+{
+   ALLOW_ALL_CONNS,            /* normal not-shutting-down state */
+   ALLOW_SUPERUSER_CONNS,      /* only superusers can connect */
+   ALLOW_NO_CONNS              /* no new connections allowed, period */
+} ConnsAllowedState;
+
+static ConnsAllowedState connsAllowed = ALLOW_ALL_CONNS;
+
  /* Start time of SIGKILL timeout during immediate shutdown or child crash */
  /* Zero means timeout is not running */
  static time_t AbortStartTime = 0;
@@ -2323,7 +2334,7 @@ retry1:
                     (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
                      errmsg("sorry, too many clients already")));
             break;
-       case CAC_WAITBACKUP:
+       case CAC_SUPERUSER:
             /* OK for now, will check in InitPostgres */
             break;
         case CAC_OK:
@@ -2443,31 +2454,36 @@ canAcceptConnections(int backend_type)
      * state.  We treat autovac workers the same as user backends for this
      * purpose.  However, bgworkers are excluded from this test; we expect
      * bgworker_should_start_now() decided whether the DB state allows them.
-    *
-    * In state PM_WAIT_BACKUP only superusers can connect (this must be
-    * allowed so that a superuser can end online backup mode); we return
-    * CAC_WAITBACKUP code to indicate that this must be checked later. Note
-    * that neither CAC_OK nor CAC_WAITBACKUP can safely be returned until we
-    * have checked for too many children.
      */
-   if (pmState != PM_RUN &&
+   if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
         backend_type != BACKEND_TYPE_BGWORKER)
     {
-       if (pmState == PM_WAIT_BACKUP)
-           result = CAC_WAITBACKUP;    /* allow superusers only */
-       else if (Shutdown > NoShutdown)
+       if (Shutdown > NoShutdown)
             return CAC_SHUTDOWN;    /* shutdown is pending */
         else if (!FatalError &&
                  (pmState == PM_STARTUP ||
                   pmState == PM_RECOVERY))
             return CAC_STARTUP; /* normal startup */
-       else if (!FatalError &&
-                pmState == PM_HOT_STANDBY)
-           result = CAC_OK;    /* connection OK during hot standby */
         else
             return CAC_RECOVERY;    /* else must be crash recovery */
     }
  
+   /*
+    * "Smart shutdown" restrictions are applied only to normal connections,
+    * not to autovac workers or bgworkers.  When only superusers can connect,
+    * we return CAC_SUPERUSER to indicate that superuserness must be checked
+    * later.  Note that neither CAC_OK nor CAC_SUPERUSER can safely be
+    * returned until we have checked for too many children.
+    */
+   if (connsAllowed != ALLOW_ALL_CONNS &&
+       backend_type == BACKEND_TYPE_NORMAL)
+   {
+       if (connsAllowed == ALLOW_SUPERUSER_CONNS)
+           result = CAC_SUPERUSER; /* allow superusers only */
+       else
+           return CAC_SHUTDOWN;    /* shutdown is pending */
+   }
+
     /*
      * Don't start too many children.
      *
@@ -2793,34 +2809,22 @@ pmdie(SIGNAL_ARGS)
             sd_notify(0, "STOPPING=1");
  #endif
  
-           if (pmState == PM_RUN || pmState == PM_RECOVERY ||
-               pmState == PM_HOT_STANDBY || pmState == PM_STARTUP)
+           /*
+            * If we reached normal running, we have to wait for any online
+            * backup mode to end; otherwise go straight to waiting for client
+            * backends to exit.  (The difference is that in the former state,
+            * we'll still let in new superuser clients, so that somebody can
+            * end the online backup mode.)  If already in PM_STOP_BACKENDS or
+            * a later state, do not change it.
+            */
+           if (pmState == PM_RUN)
+               connsAllowed = ALLOW_SUPERUSER_CONNS;
+           else if (pmState == PM_HOT_STANDBY)
+               connsAllowed = ALLOW_NO_CONNS;
+           else if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
             {
-               /* autovac workers are told to shut down immediately */
-               /* and bgworkers too; does this need tweaking? */
-               SignalSomeChildren(SIGTERM,
-                                  BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER);
-               /* and the autovac launcher too */
-               if (AutoVacPID != 0)
-                   signal_child(AutoVacPID, SIGTERM);
-               /* and the bgwriter too */
-               if (BgWriterPID != 0)
-                   signal_child(BgWriterPID, SIGTERM);
-               /* and the walwriter too */
-               if (WalWriterPID != 0)
-                   signal_child(WalWriterPID, SIGTERM);
-
-               /*
-                * If we're in recovery, we can't kill the startup process
-                * right away, because at present doing so does not release
-                * its locks.  We might want to change this in a future
-                * release.  For the time being, the PM_WAIT_READONLY state
-                * indicates that we're waiting for the regular (read only)
-                * backends to die off; once they do, we'll kill the startup
-                * and walreceiver processes.
-                */
-               pmState = (pmState == PM_RUN) ?
-                   PM_WAIT_BACKUP : PM_WAIT_READONLY;
+               /* There should be no clients, so proceed to stop children */
+               pmState = PM_STOP_BACKENDS;
             }
  
             /*
@@ -2851,48 +2855,23 @@ pmdie(SIGNAL_ARGS)
             sd_notify(0, "STOPPING=1");
  #endif
  
-           if (StartupPID != 0)
-               signal_child(StartupPID, SIGTERM);
-           if (BgWriterPID != 0)
-               signal_child(BgWriterPID, SIGTERM);
-           if (WalReceiverPID != 0)
-               signal_child(WalReceiverPID, SIGTERM);
             if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
             {
-               SignalSomeChildren(SIGTERM, BACKEND_TYPE_BGWORKER);
-
-               /*
-                * Only startup, bgwriter, walreceiver, possibly bgworkers,
-                * and/or checkpointer should be active in this state; we just
-                * signaled the first four, and we don't want to kill
-                * checkpointer yet.
-                */
-               pmState = PM_WAIT_BACKENDS;
+               /* Just shut down background processes silently */
+               pmState = PM_STOP_BACKENDS;
             }
             else if (pmState == PM_RUN ||
-                    pmState == PM_WAIT_BACKUP ||
-                    pmState == PM_WAIT_READONLY ||
-                    pmState == PM_WAIT_BACKENDS ||
                      pmState == PM_HOT_STANDBY)
             {
+               /* Report that we're about to zap live client sessions */
                 ereport(LOG,
                         (errmsg("aborting any active transactions")));
-               /* shut down all backends and workers */
-               SignalSomeChildren(SIGTERM,
-                                  BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC |
-                                  BACKEND_TYPE_BGWORKER);
-               /* and the autovac launcher too */
-               if (AutoVacPID != 0)
-                   signal_child(AutoVacPID, SIGTERM);
-               /* and the walwriter too */
-               if (WalWriterPID != 0)
-                   signal_child(WalWriterPID, SIGTERM);
-               pmState = PM_WAIT_BACKENDS;
+               pmState = PM_STOP_BACKENDS;
             }
  
             /*
-            * Now wait for backends to exit.  If there are none,
-            * PostmasterStateMachine will take the next step.
+            * PostmasterStateMachine will issue any necessary signals, or
+            * take the next step if no child processes need to be killed.
              */
             PostmasterStateMachine();
             break;
@@ -2987,7 +2966,7 @@ reaper(SIGNAL_ARGS)
                 ereport(LOG,
                         (errmsg("shutdown at recovery target")));
                 StartupStatus = STARTUP_NOT_RUNNING;
-               Shutdown = SmartShutdown;
+               Shutdown = Max(Shutdown, SmartShutdown);
                 TerminateChildren(SIGTERM);
                 pmState = PM_WAIT_BACKENDS;
                 /* PostmasterStateMachine logic does the rest */
@@ -3051,6 +3030,7 @@ reaper(SIGNAL_ARGS)
             AbortStartTime = 0;
             ReachedNormalRunning = true;
             pmState = PM_RUN;
+           connsAllowed = ALLOW_ALL_CONNS;
  
             /*
              * Crank up the background tasks, if we didn't do that already
@@ -3712,8 +3692,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
     if (pmState == PM_RECOVERY ||
         pmState == PM_HOT_STANDBY ||
         pmState == PM_RUN ||
-       pmState == PM_WAIT_BACKUP ||
-       pmState == PM_WAIT_READONLY ||
+       pmState == PM_STOP_BACKENDS ||
         pmState == PM_SHUTDOWN)
         pmState = PM_WAIT_BACKENDS;
  
@@ -3796,35 +3775,60 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
  static void
  PostmasterStateMachine(void)
  {
-   if (pmState == PM_WAIT_BACKUP)
+   /* If we're doing a smart shutdown, try to advance that state. */
+   if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
     {
-       /*
-        * PM_WAIT_BACKUP state ends when online backup mode is not active.
-        */
-       if (!BackupInProgress())
-           pmState = PM_WAIT_BACKENDS;
-   }
+       if (connsAllowed == ALLOW_SUPERUSER_CONNS)
+       {
+           /*
+            * ALLOW_SUPERUSER_CONNS state ends as soon as online backup mode
+            * is not active.
+            */
+           if (!BackupInProgress())
+               connsAllowed = ALLOW_NO_CONNS;
+       }
  
-   if (pmState == PM_WAIT_READONLY)
-   {
-       /*
-        * PM_WAIT_READONLY state ends when we have no regular backends that
-        * have been started during recovery.  We kill the startup and
-        * walreceiver processes and transition to PM_WAIT_BACKENDS.  Ideally,
-        * we might like to kill these processes first and then wait for
-        * backends to die off, but that doesn't work at present because
-        * killing the startup process doesn't release its locks.
-        */
-       if (CountChildren(BACKEND_TYPE_NORMAL) == 0)
+       if (connsAllowed == ALLOW_NO_CONNS)
         {
-           if (StartupPID != 0)
-               signal_child(StartupPID, SIGTERM);
-           if (WalReceiverPID != 0)
-               signal_child(WalReceiverPID, SIGTERM);
-           pmState = PM_WAIT_BACKENDS;
+           /*
+            * ALLOW_NO_CONNS state ends when we have no normal client
+            * backends running.  Then we're ready to stop other children.
+            */
+           if (CountChildren(BACKEND_TYPE_NORMAL) == 0)
+               pmState = PM_STOP_BACKENDS;
         }
     }
  
+   /*
+    * If we're ready to do so, signal child processes to shut down.  (This
+    * isn't a persistent state, but treating it as a distinct pmState allows
+    * us to share this code across multiple shutdown code paths.)
+    */
+   if (pmState == PM_STOP_BACKENDS)
+   {
+       /* Signal all backend children except walsenders */
+       SignalSomeChildren(SIGTERM,
+                          BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND);
+       /* and the autovac launcher too */
+       if (AutoVacPID != 0)
+           signal_child(AutoVacPID, SIGTERM);
+       /* and the bgwriter too */
+       if (BgWriterPID != 0)
+           signal_child(BgWriterPID, SIGTERM);
+       /* and the walwriter too */
+       if (WalWriterPID != 0)
+           signal_child(WalWriterPID, SIGTERM);
+       /* If we're in recovery, also stop startup and walreceiver procs */
+       if (StartupPID != 0)
+           signal_child(StartupPID, SIGTERM);
+       if (WalReceiverPID != 0)
+           signal_child(WalReceiverPID, SIGTERM);
+       /* checkpointer, archiver, stats, and syslogger may continue for now */
+
+       /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
+       pmState = PM_WAIT_BACKENDS;
+   }
+
     /*
      * If we are in a state-machine state that implies waiting for backends to
      * exit, see if they're all gone, and change state if so.
@@ -3843,7 +3847,7 @@ PostmasterStateMachine(void)
          * later after writing the checkpoint record, like the archiver
          * process.
          */
-       if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_WORKER) == 0 &&
+       if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 &&
             StartupPID == 0 &&
             WalReceiverPID == 0 &&
             BgWriterPID == 0 &&
@@ -4184,7 +4188,7 @@ BackendStartup(Port *port)
     /* Pass down canAcceptConnections state */
     port->canAcceptConnections = canAcceptConnections(BACKEND_TYPE_NORMAL);
     bn->dead_end = (port->canAcceptConnections != CAC_OK &&
-                   port->canAcceptConnections != CAC_WAITBACKUP);
+                   port->canAcceptConnections != CAC_SUPERUSER);
  
     /*
      * Unless it's a dead_end child, assign it a child slot number
@@ -5255,6 +5259,8 @@ sigusr1_handler(SIGNAL_ARGS)
  #endif
  
         pmState = PM_HOT_STANDBY;
+       connsAllowed = ALLOW_ALL_CONNS;
+
         /* Some workers may be scheduled to start now */
         StartWorkerNeeded = true;
     }
@@ -5287,7 +5293,7 @@ sigusr1_handler(SIGNAL_ARGS)
     }
  
     if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER) &&
-       Shutdown == NoShutdown)
+       Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS)
     {
         /*
          * Start one iteration of the autovacuum daemon, even if autovacuuming
@@ -5302,7 +5308,7 @@ sigusr1_handler(SIGNAL_ARGS)
     }
  
     if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER) &&
-       Shutdown == NoShutdown)
+       Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS)
     {
         /* The autovacuum launcher wants us to start a worker process. */
         StartAutovacuumWorker();
@@ -5333,7 +5339,7 @@ sigusr1_handler(SIGNAL_ARGS)
  
     if (StartupPID != 0 &&
         (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
-        pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY) &&
+        pmState == PM_HOT_STANDBY) &&
         CheckPromoteSignal())
     {
         /*
@@ -5651,8 +5657,8 @@ MaybeStartWalReceiver(void)
  {
     if (WalReceiverPID == 0 &&
         (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
-        pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY) &&
-       Shutdown == NoShutdown)
+        pmState == PM_HOT_STANDBY) &&
+       Shutdown <= SmartShutdown)
     {
         WalReceiverPID = StartWalReceiver();
         if (WalReceiverPID != 0)
@@ -5905,8 +5911,7 @@ bgworker_should_start_now(BgWorkerStartTime start_time)
         case PM_SHUTDOWN_2:
         case PM_SHUTDOWN:
         case PM_WAIT_BACKENDS:
-       case PM_WAIT_READONLY:
-       case PM_WAIT_BACKUP:
+       case PM_STOP_BACKENDS:
             break;
  
         case PM_RUN:
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 893be2f3ddbfe350a7c7a63370623acd7253bb1f..d4ab4c7e233319f3f14bab04fb6db7aa0ce20556 100644 (file)
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -795,7 +795,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
      */
     if ((!am_superuser || am_walsender) &&
         MyProcPort != NULL &&
-       MyProcPort->canAcceptConnections == CAC_WAITBACKUP)
+       MyProcPort->canAcceptConnections == CAC_SUPERUSER)
     {
         if (am_walsender)
             ereport(FATAL,
diff --git a/src/include/libpq/libpq-be.h b/src/include/libpq/libpq-be.h
index 179ebaa104b328ba8d3f4979f999a8a24697e23a..0a23281ad59b511d6cbdddd138e8513d58c2d8e4 100644 (file)
--- a/src/include/libpq/libpq-be.h
+++ b/src/include/libpq/libpq-be.h
@@ -71,7 +71,7 @@ typedef struct
  typedef enum CAC_state
  {
     CAC_OK, CAC_STARTUP, CAC_SHUTDOWN, CAC_RECOVERY, CAC_TOOMANY,
-   CAC_WAITBACKUP
+   CAC_SUPERUSER
  } CAC_state;
author	Tom Lane <[email protected]>
	Fri, 14 Aug 2020 17:26:57 +0000 (13:26 -0400)
committer	Tom Lane <[email protected]>
	Fri, 14 Aug 2020 17:26:57 +0000 (13:26 -0400)
doc/src/sgml/ref/pg_ctl-ref.sgml		\| blob \| blame \| history
src/backend/postmaster/postmaster.c		\| blob \| blame \| history
src/backend/utils/init/postinit.c		\| blob \| blame \| history
src/include/libpq/libpq-be.h		\| blob \| blame \| history