Use condition variables to wait for checkpoints.
authorThomas Munro <[email protected]>
Wed, 13 Mar 2019 21:25:27 +0000 (10:25 +1300)
committerThomas Munro <[email protected]>
Wed, 13 Mar 2019 21:59:33 +0000 (10:59 +1300)
Previously we used a polling/sleeping loop to wait for checkpoints
to begin and end, which leads to up to a couple hundred milliseconds
of needless thumb-twiddling.  Use condition variables instead.

Author: Thomas Munro
Reviewed-by: Andres Freund
Discussion: https://postgr.es/m/CA%2BhUKGLY7sDe%2Bbg1K%3DbnEzOofGoo4bJHYh9%2BcDCXJepb6DQmLw%40mail.gmail.com

doc/src/sgml/monitoring.sgml
src/backend/postmaster/checkpointer.c
src/backend/postmaster/pgstat.c
src/include/pgstat.h

index e2630fd3682ab8bb99f0ced32b05d17e65b34976..60b89356f709981827d86cce8dc7379aebc3d821 100644 (file)
@@ -1281,7 +1281,7 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
          <entry>Waiting in an extension.</entry>
         </row>
         <row>
-         <entry morerows="34"><literal>IPC</literal></entry>
+         <entry morerows="36"><literal>IPC</literal></entry>
          <entry><literal>BgWorkerShutdown</literal></entry>
          <entry>Waiting for background worker to shut down.</entry>
         </row>
@@ -1293,6 +1293,14 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
          <entry><literal>BtreePage</literal></entry>
          <entry>Waiting for the page number needed to continue a parallel B-tree scan to become available.</entry>
         </row>
+        <row>
+         <entry><literal>CheckpointDone</literal></entry>
+         <entry>Waiting for a checkpoint to complete.</entry>
+        </row>
+        <row>
+         <entry><literal>CheckpointStart</literal></entry>
+         <entry>Waiting for a checkpoint to start.</entry>
+        </row>
         <row>
          <entry><literal>ClogGroupUpdate</literal></entry>
          <entry>Waiting for group leader to update transaction status at transaction end.</entry>
index fe96c41359b70b6f2a82196c41eea7e33b5478fb..3d5b382d048d778a3d4549669a9a1be851493981 100644 (file)
@@ -126,6 +126,9 @@ typedef struct
 
    int         ckpt_flags;     /* checkpoint flags, as defined in xlog.h */
 
+   ConditionVariable start_cv; /* signaled when ckpt_started advances */
+   ConditionVariable done_cv;  /* signaled when ckpt_done advances */
+
    uint32      num_backend_writes; /* counts user backend buffer writes */
    uint32      num_backend_fsync;  /* counts user backend fsync calls */
 
@@ -428,6 +431,8 @@ CheckpointerMain(void)
            CheckpointerShmem->ckpt_started++;
            SpinLockRelease(&CheckpointerShmem->ckpt_lck);
 
+           ConditionVariableBroadcast(&CheckpointerShmem->start_cv);
+
            /*
             * The end-of-recovery checkpoint is a real checkpoint that's
             * performed while we're still in recovery.
@@ -488,6 +493,8 @@ CheckpointerMain(void)
            CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started;
            SpinLockRelease(&CheckpointerShmem->ckpt_lck);
 
+           ConditionVariableBroadcast(&CheckpointerShmem->done_cv);
+
            if (ckpt_performed)
            {
                /*
@@ -915,6 +922,8 @@ CheckpointerShmemInit(void)
        MemSet(CheckpointerShmem, 0, size);
        SpinLockInit(&CheckpointerShmem->ckpt_lck);
        CheckpointerShmem->max_requests = NBuffers;
+       ConditionVariableInit(&CheckpointerShmem->start_cv);
+       ConditionVariableInit(&CheckpointerShmem->done_cv);
    }
 }
 
@@ -1023,6 +1032,7 @@ RequestCheckpoint(int flags)
                    new_failed;
 
        /* Wait for a new checkpoint to start. */
+       ConditionVariablePrepareToSleep(&CheckpointerShmem->start_cv);
        for (;;)
        {
            SpinLockAcquire(&CheckpointerShmem->ckpt_lck);
@@ -1032,13 +1042,15 @@ RequestCheckpoint(int flags)
            if (new_started != old_started)
                break;
 
-           CHECK_FOR_INTERRUPTS();
-           pg_usleep(100000L);
+           ConditionVariableSleep(&CheckpointerShmem->start_cv,
+                                  WAIT_EVENT_CHECKPOINT_START);
        }
+       ConditionVariableCancelSleep();
 
        /*
         * We are waiting for ckpt_done >= new_started, in a modulo sense.
         */
+       ConditionVariablePrepareToSleep(&CheckpointerShmem->done_cv);
        for (;;)
        {
            int         new_done;
@@ -1051,9 +1063,10 @@ RequestCheckpoint(int flags)
            if (new_done - new_started >= 0)
                break;
 
-           CHECK_FOR_INTERRUPTS();
-           pg_usleep(100000L);
+           ConditionVariableSleep(&CheckpointerShmem->done_cv,
+                                  WAIT_EVENT_CHECKPOINT_DONE);
        }
+       ConditionVariableCancelSleep();
 
        if (new_failed != old_failed)
            ereport(ERROR,
index ba31f532ea4e00e3f1e20a4901c873b061a83cd9..2fbfadd9f0c755cb623e04010dcf2145c6048457 100644 (file)
@@ -3623,6 +3623,12 @@ pgstat_get_wait_ipc(WaitEventIPC w)
        case WAIT_EVENT_BTREE_PAGE:
            event_name = "BtreePage";
            break;
+       case WAIT_EVENT_CHECKPOINT_DONE:
+           event_name = "CheckpointDone";
+           break;
+       case WAIT_EVENT_CHECKPOINT_START:
+           event_name = "CheckpointStart";
+           break;
        case WAIT_EVENT_CLOG_GROUP_UPDATE:
            event_name = "ClogGroupUpdate";
            break;
index 725c8b0d64a9515342f653071530fea54b2fb0fa..ea6cc8b560f221b8e3314dfb4bc5bf8441006bc7 100644 (file)
@@ -817,6 +817,8 @@ typedef enum
    WAIT_EVENT_BGWORKER_STARTUP,
    WAIT_EVENT_BTREE_PAGE,
    WAIT_EVENT_CLOG_GROUP_UPDATE,
+   WAIT_EVENT_CHECKPOINT_DONE,
+   WAIT_EVENT_CHECKPOINT_START,
    WAIT_EVENT_EXECUTE_GATHER,
    WAIT_EVENT_HASH_BATCH_ALLOCATING,
    WAIT_EVENT_HASH_BATCH_ELECTING,