From 91af84594af299d0dc64c81ac640266187dcb84b Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 4 Feb 2009 20:15:44 +0200 Subject: [PATCH] Don't rename recovery.conf out of the way until the first checkpoint, like it was done in Simon's original patch. And fix some other comments pointed out by Simon. --- src/backend/access/transam/xlog.c | 89 +++++++++++++++++++------------ src/include/access/xlog.h | 6 +-- 2 files changed, 59 insertions(+), 36 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 79505a0145..961bcf3c43 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -1765,6 +1765,13 @@ XLogSetAsyncCommitLSN(XLogRecPtr asyncCommitLSN) SpinLockRelease(&xlogctl->info_lck); } +/* + * Advance minRecoveryPoint in control file. + * + * If we crash during reocvery, we must reach this point again before + * the database is consistent. If minRecoveryPoint is already greater than + * or equal to 'lsn', it is not updated. + */ static void UpdateMinRecoveryPoint(XLogRecPtr lsn) { @@ -1772,14 +1779,6 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn) if (!updateMinRecoveryPoint || XLByteLE(lsn, minRecoveryPoint)) return; - /* XXX - * Calculate and write out a new safeStartPoint. This defines - * the latest LSN that might appear on-disk while we apply - * the WAL records in this file. If we crash during recovery - * we must reach this point again before we can prove - * database consistency. Not a restartpoint! Restart points - * define where we should start recovery from, if we crash. - */ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); /* update local copy */ @@ -1797,21 +1796,22 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn) volatile XLogCtlData *xlogctl = XLogCtl; /* - * We need to update the control file. To avoid having to update it - * too often, we update it all the way to EndRecPtr, even though 'lsn' + * To avoid having to update the control file too often, we update + * it all the way to the last record being replayed, even though 'lsn' * would suffice for correctness. */ SpinLockAcquire(&xlogctl->info_lck); minRecoveryPoint = xlogctl->replayEndRecPtr; SpinLockRelease(&xlogctl->info_lck); + /* update control file */ ControlFile->minRecoveryPoint = minRecoveryPoint; UpdateControlFile(); + + elog(DEBUG2, "updated min recovery point to %X/%X", + minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff); } LWLockRelease(ControlFileLock); - - elog(LOG, "updated min recovery point to %X/%X", - minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff); } /* @@ -4835,18 +4835,13 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg) unlink(recoveryPath); /* ignore any error */ /* - * Rename the config file out of the way, so that we don't accidentally - * re-enter archive recovery mode in a subsequent crash. We have already - * restored all the WAL segments we need from the archive, and we trust - * that they are not going to go away even if we crash. (XXX: should - * we fsync() them all to ensure that?) + * As of 8.4 we no longer rename the recovery.conf file out of the + * way until after we have performed a full checkpoint. This ensures + * that any crash between now and the end of the checkpoint does not + * attempt to restart from a WAL file that is no longer available to us. + * As soon as we remove recovery.conf we lose our recovery_command and + * cannot reaccess WAL files from the archive. */ - unlink(RECOVERY_COMMAND_DONE); - if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0) - ereport(FATAL, - (errcode_for_file_access(), - errmsg("could not rename file \"%s\" to \"%s\": %m", - RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE))); ereport(LOG, (errmsg("archive recovery complete"))); @@ -5279,16 +5274,23 @@ StartupXLOG(void) /* use volatile pointer to prevent code rearrangement */ volatile XLogCtlData *xlogctl = XLogCtl; - InRedo = true; - ereport(LOG, - (errmsg("redo starts at %X/%X", - ReadRecPtr.xlogid, ReadRecPtr.xrecoff))); - /* Update shared copy of replayEndRecPtr */ SpinLockAcquire(&xlogctl->info_lck); xlogctl->replayEndRecPtr = ReadRecPtr; SpinLockRelease(&xlogctl->info_lck); + InRedo = true; + + if (minRecoveryPoint.xlogid == 0 && minRecoveryPoint.xrecoff == 0) + ereport(LOG, + (errmsg("redo starts at %X/%X", + ReadRecPtr.xlogid, ReadRecPtr.xrecoff))); + else + ereport(LOG, + (errmsg("redo starts at %X/%X, consistency will be reached at %X/%X", + ReadRecPtr.xlogid, ReadRecPtr.xrecoff, + minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff))); + /* * Let postmaster know we've started redo now. * @@ -5355,8 +5357,7 @@ StartupXLOG(void) if (InArchiveRecovery) { ereport(LOG, - (errmsg("consistent recovery state reached at %X/%X", - EndRecPtr.xlogid, EndRecPtr.xrecoff))); + (errmsg("consistent recovery state reached"))); if (IsUnderPostmaster) SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT); } @@ -6029,6 +6030,7 @@ CreateCheckPoint(int flags) uint32 _logSeg; TransactionId *inCommitXids; int nInCommit; + bool leavingArchiveRecovery; /* shouldn't happen */ if (IsRecoveryProcessingMode()) @@ -6042,6 +6044,13 @@ CreateCheckPoint(int flags) */ LWLockAcquire(CheckpointLock, LW_EXCLUSIVE); + /* + * Find out if this is the first checkpoint after archive recovery. + */ + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + leavingArchiveRecovery = (ControlFile->state == DB_IN_ARCHIVE_RECOVERY); + LWLockRelease(ControlFileLock); + /* * Prepare to accumulate statistics. * @@ -6286,6 +6295,21 @@ CreateCheckPoint(int flags) UpdateControlFile(); LWLockRelease(ControlFileLock); + if (leavingArchiveRecovery) + { + /* + * Rename the config file out of the way, so that we don't accidentally + * re-enter archive recovery mode in a subsequent crash. Prior to + * 8.4 this step was performed at end of exitArchiveRecovery(). + */ + unlink(RECOVERY_COMMAND_DONE); + if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not rename file \"%s\" to \"%s\": %m", + RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE))); + } + /* Update shared-memory copy of checkpoint XID/epoch */ { /* use volatile pointer to prevent code rearrangement */ @@ -6329,8 +6353,7 @@ CreateCheckPoint(int flags) * Truncate pg_subtrans if possible. We can throw away all data before * the oldest XMIN of any running transaction. No future transaction will * attempt to reference any pg_subtrans entry older than that (see Asserts - * in subtrans.c). During recovery, though, we mustn't do this because - * StartupSUBTRANS hasn't been called yet. + * in subtrans.c). */ TruncateSUBTRANS(GetOldestXmin(true, false)); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index c3b3ec7ee1..b97a6afbf0 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -168,9 +168,9 @@ extern bool XLOG_DEBUG; /* These directly affect the behavior of CreateCheckPoint and subsidiaries */ #define CHECKPOINT_IS_SHUTDOWN 0x0001 /* Checkpoint is for shutdown */ -#define CHECKPOINT_IMMEDIATE 0x0002 /* Do it without delays */ -#define CHECKPOINT_FORCE 0x0004 /* Force even if no activity */ -#define CHECKPOINT_STARTUP 0x0008 /* Startup checkpoint */ +#define CHECKPOINT_IS_STARTUP 0x0002 /* Startup checkpoint */ +#define CHECKPOINT_IMMEDIATE 0x0003 /* Do it without delays */ +#define CHECKPOINT_FORCE 0x0008 /* Force even if no activity */ /* These are important to RequestCheckpoint */ #define CHECKPOINT_WAIT 0x0010 /* Wait for completion */ /* These indicate the cause of a checkpoint request */ -- 2.30.2