* one is as a means of determining the set of currently running transactions.
*
* Because of various subtle race conditions it is critical that a backend
- * hold the correct locks while setting or clearing its MyPgXact->xid field.
- * See notes in src/backend/access/transam/README.
+ * hold the correct locks while setting or clearing its xid (in
+ * ProcGlobal->xids[]/MyProc->xid). See notes in
+ * src/backend/access/transam/README.
*
* The process arrays now also include structures representing prepared
* transactions. The xid and subxids fields of these are valid, as are the
ProcArrayStruct *arrayP = procArray;
int index;
+ /* See ProcGlobal comment explaining why both locks are held */
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
if (arrayP->numProcs >= arrayP->maxProcs)
{
* fixed supply of PGPROC structs too, and so we should have failed
* earlier.)
*/
- LWLockRelease(ProcArrayLock);
ereport(FATAL,
(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
errmsg("sorry, too many clients already")));
}
memmove(&arrayP->pgprocnos[index + 1], &arrayP->pgprocnos[index],
- (arrayP->numProcs - index) * sizeof(int));
+ (arrayP->numProcs - index) * sizeof(*arrayP->pgprocnos));
+ memmove(&ProcGlobal->xids[index + 1], &ProcGlobal->xids[index],
+ (arrayP->numProcs - index) * sizeof(*ProcGlobal->xids));
+
arrayP->pgprocnos[index] = proc->pgprocno;
+ ProcGlobal->xids[index] = proc->xid;
+
arrayP->numProcs++;
+ for (; index < arrayP->numProcs; index++)
+ {
+ allProcs[arrayP->pgprocnos[index]].pgxactoff = index;
+ }
+
+ /*
+ * Release in reversed acquisition order, to reduce frequency of having to
+ * wait for XidGenLock while holding ProcArrayLock.
+ */
+ LWLockRelease(XidGenLock);
LWLockRelease(ProcArrayLock);
}
DisplayXidCache();
#endif
+ /* See ProcGlobal comment explaining why both locks are held */
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
+
+ Assert(ProcGlobal->allProcs[arrayP->pgprocnos[proc->pgxactoff]].pgxactoff == proc->pgxactoff);
if (TransactionIdIsValid(latestXid))
{
- Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
+ Assert(TransactionIdIsValid(ProcGlobal->xids[proc->pgxactoff]));
/* Advance global latestCompletedXid while holding the lock */
MaintainLatestCompletedXid(latestXid);
+
+ ProcGlobal->xids[proc->pgxactoff] = 0;
}
else
{
/* Shouldn't be trying to remove a live transaction here */
- Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
+ Assert(!TransactionIdIsValid(ProcGlobal->xids[proc->pgxactoff]));
}
+ Assert(TransactionIdIsValid(ProcGlobal->xids[proc->pgxactoff] == 0));
+
for (index = 0; index < arrayP->numProcs; index++)
{
if (arrayP->pgprocnos[index] == proc->pgprocno)
{
/* Keep the PGPROC array sorted. See notes above */
memmove(&arrayP->pgprocnos[index], &arrayP->pgprocnos[index + 1],
- (arrayP->numProcs - index - 1) * sizeof(int));
+ (arrayP->numProcs - index - 1) * sizeof(*arrayP->pgprocnos));
+ memmove(&ProcGlobal->xids[index], &ProcGlobal->xids[index + 1],
+ (arrayP->numProcs - index - 1) * sizeof(*ProcGlobal->xids));
+
arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */
arrayP->numProcs--;
+
+ /* adjust for removed PGPROC */
+ for (; index < arrayP->numProcs; index++)
+ allProcs[arrayP->pgprocnos[index]].pgxactoff--;
+
+ /*
+ * Release in reversed acquisition order, to reduce frequency of
+ * having to wait for XidGenLock while holding ProcArrayLock.
+ */
+ LWLockRelease(XidGenLock);
LWLockRelease(ProcArrayLock);
return;
}
}
/* Oops */
+ LWLockRelease(XidGenLock);
LWLockRelease(ProcArrayLock);
elog(LOG, "failed to find proc %p in ProcArray", proc);
* else is taking a snapshot. See discussion in
* src/backend/access/transam/README.
*/
- Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
+ Assert(TransactionIdIsValid(proc->xid));
/*
* If we can immediately acquire ProcArrayLock, we clear our own XID
* anyone else's calculation of a snapshot. We might change their
* estimate of global xmin, but that's OK.
*/
- Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
+ Assert(!TransactionIdIsValid(proc->xid));
proc->lxid = InvalidLocalTransactionId;
/* must be cleared with xid/xmin: */
ProcArrayEndTransactionInternal(PGPROC *proc, PGXACT *pgxact,
TransactionId latestXid)
{
- pgxact->xid = InvalidTransactionId;
+ size_t pgxactoff = proc->pgxactoff;
+
+ Assert(TransactionIdIsValid(ProcGlobal->xids[pgxactoff]));
+ Assert(ProcGlobal->xids[pgxactoff] == proc->xid);
+
+ ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
+ proc->xid = InvalidTransactionId;
proc->lxid = InvalidLocalTransactionId;
/* must be cleared with xid/xmin: */
pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
uint32 wakeidx;
/* We should definitely have an XID to clear. */
- Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
+ Assert(TransactionIdIsValid(proc->xid));
/* Add ourselves to the list of processes needing a group XID clear. */
proc->procArrayGroupMember = true;
* This is used after successfully preparing a 2-phase transaction. We are
* not actually reporting the transaction's XID as no longer running --- it
* will still appear as running because the 2PC's gxact is in the ProcArray
- * too. We just have to clear out our own PGXACT.
+ * too. We just have to clear out our own PGPROC.
*/
void
ProcArrayClearTransaction(PGPROC *proc)
{
PGXACT *pgxact = &allPgXact[proc->pgprocno];
+ size_t pgxactoff;
/*
- * We can skip locking ProcArrayLock here, because this action does not
- * actually change anyone's view of the set of running XIDs: our entry is
- * duplicate with the gxact that has already been inserted into the
- * ProcArray.
+ * We can skip locking ProcArrayLock exclusively here, because this action
+ * does not actually change anyone's view of the set of running XIDs: our
+ * entry is duplicate with the gxact that has already been inserted into
+ * the ProcArray. But need it in shared mode for pgproc->pgxactoff to stay
+ * the same.
*/
- pgxact->xid = InvalidTransactionId;
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+
+ pgxactoff = proc->pgxactoff;
+
+ ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
+ proc->xid = InvalidTransactionId;
+
proc->lxid = InvalidLocalTransactionId;
proc->xmin = InvalidTransactionId;
proc->recoveryConflictPending = false;
/* Clear the subtransaction-XID cache too */
pgxact->nxids = 0;
pgxact->overflowed = false;
+
+ LWLockRelease(ProcArrayLock);
}
/*
* there are four possibilities for finding a running transaction:
*
* 1. The given Xid is a main transaction Id. We will find this out cheaply
- * by looking at the PGXACT struct for each backend.
+ * by looking at ProcGlobal->xids.
*
* 2. The given Xid is one of the cached subxact Xids in the PGPROC array.
* We can find this out cheaply too.
* if the Xid is running on the primary.
*
* 4. Search the SubTrans tree to find the Xid's topmost parent, and then see
- * if that is running according to PGXACT or KnownAssignedXids. This is the
- * slowest way, but sadly it has to be done always if the others failed,
- * unless we see that the cached subxact sets are complete (none have
+ * if that is running according to ProcGlobal->xids[] or KnownAssignedXids.
+ * This is the slowest way, but sadly it has to be done always if the others
+ * failed, unless we see that the cached subxact sets are complete (none have
* overflowed).
*
* ProcArrayLock has to be held while we do 1, 2, 3. If we save the top Xids
* while doing 1 and 3, we can release the ProcArrayLock while we do 4.
* This buys back some concurrency (and we can't retrieve the main Xids from
- * PGXACT again anyway; see GetNewTransactionId).
+ * ProcGlobal->xids[] again anyway; see GetNewTransactionId).
*/
bool
TransactionIdIsInProgress(TransactionId xid)
{
static TransactionId *xids = NULL;
+ static TransactionId *other_xids;
int nxids = 0;
ProcArrayStruct *arrayP = procArray;
TransactionId topxid;
TransactionId latestCompletedXid;
- int i,
- j;
+ int mypgxactoff;
+ size_t numProcs;
+ int j;
/*
* Don't bother checking a transaction older than RecentXmin; it could not
errmsg("out of memory")));
}
+ other_xids = ProcGlobal->xids;
+
LWLockAcquire(ProcArrayLock, LW_SHARED);
/*
}
/* No shortcuts, gotta grovel through the array */
- for (i = 0; i < arrayP->numProcs; i++)
+ mypgxactoff = MyProc->pgxactoff;
+ numProcs = arrayP->numProcs;
+ for (size_t pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
{
- int pgprocno = arrayP->pgprocnos[i];
- PGPROC *proc = &allProcs[pgprocno];
- PGXACT *pgxact = &allPgXact[pgprocno];
+ int pgprocno;
+ PGXACT *pgxact;
+ PGPROC *proc;
TransactionId pxid;
int pxids;
- /* Ignore my own proc --- dealt with it above */
- if (proc == MyProc)
+ /* Ignore ourselves --- dealt with it above */
+ if (pgxactoff == mypgxactoff)
continue;
/* Fetch xid just once - see GetNewTransactionId */
- pxid = UINT32_ACCESS_ONCE(pgxact->xid);
+ pxid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
if (!TransactionIdIsValid(pxid))
continue;
/*
* Step 2: check the cached child-Xids arrays
*/
+ pgprocno = arrayP->pgprocnos[pgxactoff];
+ pgxact = &allPgXact[pgprocno];
pxids = pgxact->nxids;
pg_read_barrier(); /* pairs with barrier in GetNewTransactionId() */
+ pgprocno = arrayP->pgprocnos[pgxactoff];
+ proc = &allProcs[pgprocno];
for (j = pxids - 1; j >= 0; j--)
{
/* Fetch xid just once - see GetNewTransactionId */
*/
if (RecoveryInProgress())
{
- /* none of the PGXACT entries should have XIDs in hot standby mode */
+ /* none of the PGPROC entries should have XIDs in hot standby mode */
Assert(nxids == 0);
if (KnownAssignedXidExists(xid))
Assert(TransactionIdIsValid(topxid));
if (!TransactionIdEquals(topxid, xid))
{
- for (i = 0; i < nxids; i++)
+ for (int i = 0; i < nxids; i++)
{
if (TransactionIdEquals(xids[i], topxid))
return true;
{
bool result = false;
ProcArrayStruct *arrayP = procArray;
+ TransactionId *other_xids = ProcGlobal->xids;
int i;
/*
{
int pgprocno = arrayP->pgprocnos[i];
PGPROC *proc = &allProcs[pgprocno];
- PGXACT *pgxact = &allPgXact[pgprocno];
TransactionId pxid;
/* Fetch xid just once - see GetNewTransactionId */
- pxid = UINT32_ACCESS_ONCE(pgxact->xid);
+ pxid = UINT32_ACCESS_ONCE(other_xids[i]);
if (!TransactionIdIsValid(pxid))
continue;
ProcArrayStruct *arrayP = procArray;
TransactionId kaxmin;
bool in_recovery = RecoveryInProgress();
+ TransactionId *other_xids = ProcGlobal->xids;
/* inferred after ProcArrayLock is released */
h->catalog_oldest_nonremovable = InvalidTransactionId;
TransactionId xmin;
/* Fetch xid just once - see GetNewTransactionId */
- xid = UINT32_ACCESS_ONCE(pgxact->xid);
+ xid = UINT32_ACCESS_ONCE(other_xids[pgprocno]);
xmin = UINT32_ACCESS_ONCE(proc->xmin);
/*
GetSnapshotData(Snapshot snapshot)
{
ProcArrayStruct *arrayP = procArray;
+ TransactionId *other_xids = ProcGlobal->xids;
TransactionId xmin;
TransactionId xmax;
- int index;
- int count = 0;
+ size_t count = 0;
int subcount = 0;
bool suboverflowed = false;
FullTransactionId latest_completed;
TransactionId oldestxid;
+ int mypgxactoff;
+ TransactionId myxid;
+
TransactionId replication_slot_xmin = InvalidTransactionId;
TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
LWLockAcquire(ProcArrayLock, LW_SHARED);
latest_completed = ShmemVariableCache->latestCompletedXid;
+ mypgxactoff = MyProc->pgxactoff;
+ myxid = other_xids[mypgxactoff];
+ Assert(myxid == MyProc->xid);
+
oldestxid = ShmemVariableCache->oldestXid;
/* xmax is always latestCompletedXid + 1 */
/* initialize xmin calculation with xmax */
xmin = xmax;
+ /* take own xid into account, saves a check inside the loop */
+ if (TransactionIdIsNormal(myxid) && NormalTransactionIdPrecedes(myxid, xmin))
+ xmin = myxid;
+
snapshot->takenDuringRecovery = RecoveryInProgress();
if (!snapshot->takenDuringRecovery)
{
+ size_t numProcs = arrayP->numProcs;
+ TransactionId *xip = snapshot->xip;
int *pgprocnos = arrayP->pgprocnos;
- int numProcs;
/*
- * Spin over procArray checking xid, xmin, and subxids. The goal is
- * to gather all active xids, find the lowest xmin, and try to record
- * subxids.
+ * First collect set of pgxactoff/xids that need to be included in the
+ * snapshot.
*/
- numProcs = arrayP->numProcs;
- for (index = 0; index < numProcs; index++)
+ for (size_t pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
{
- int pgprocno = pgprocnos[index];
- PGXACT *pgxact = &allPgXact[pgprocno];
- TransactionId xid;
+ /* Fetch xid just once - see GetNewTransactionId */
+ TransactionId xid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
+ int pgprocno;
+ PGXACT *pgxact;
+ uint8 vacuumFlags;
+
+ Assert(allProcs[arrayP->pgprocnos[pgxactoff]].pgxactoff == pgxactoff);
/*
- * Skip over backends doing logical decoding which manages xmin
- * separately (check below) and ones running LAZY VACUUM.
+ * If the transaction has no XID assigned, we can skip it; it
+ * won't have sub-XIDs either.
*/
- if (pgxact->vacuumFlags &
- (PROC_IN_LOGICAL_DECODING | PROC_IN_VACUUM))
+ if (likely(xid == InvalidTransactionId))
continue;
- /* Fetch xid just once - see GetNewTransactionId */
- xid = UINT32_ACCESS_ONCE(pgxact->xid);
+ /*
+ * We don't include our own XIDs (if any) in the snapshot. It
+ * needs to be includeded in the xmin computation, but we did so
+ * outside the loop.
+ */
+ if (pgxactoff == mypgxactoff)
+ continue;
/*
- * If the transaction has no XID assigned, we can skip it; it
- * won't have sub-XIDs either. If the XID is >= xmax, we can also
- * skip it; such transactions will be treated as running anyway
- * (and any sub-XIDs will also be >= xmax).
+ * The only way we are able to get here with a non-normal xid
+ * is during bootstrap - with this backend using
+ * BootstrapTransactionId. But the above test should filter
+ * that out.
*/
- if (!TransactionIdIsNormal(xid)
- || !NormalTransactionIdPrecedes(xid, xmax))
+ Assert(TransactionIdIsNormal(xid));
+
+ /*
+ * If the XID is >= xmax, we can skip it; such transactions will
+ * be treated as running anyway (and any sub-XIDs will also be >=
+ * xmax).
+ */
+ if (!NormalTransactionIdPrecedes(xid, xmax))
continue;
+ pgprocno = pgprocnos[pgxactoff];
+ pgxact = &allPgXact[pgprocno];
+ vacuumFlags = pgxact->vacuumFlags;
+
/*
- * We don't include our own XIDs (if any) in the snapshot, but we
- * must include them in xmin.
+ * Skip over backends doing logical decoding which manages xmin
+ * separately (check below) and ones running LAZY VACUUM.
*/
+ if (vacuumFlags & (PROC_IN_LOGICAL_DECODING | PROC_IN_VACUUM))
+ continue;
+
if (NormalTransactionIdPrecedes(xid, xmin))
xmin = xid;
- if (pgxact == MyPgXact)
- continue;
/* Add XID to snapshot. */
- snapshot->xip[count++] = xid;
+ xip[count++] = xid;
/*
* Save subtransaction XIDs if possible (if we've already
suboverflowed = true;
else
{
- int nxids = pgxact->nxids;
+ int nsubxids = pgxact->nxids;
- if (nxids > 0)
+ if (nsubxids > 0)
{
PGPROC *proc = &allProcs[pgprocno];
memcpy(snapshot->subxip + subcount,
(void *) proc->subxids.xids,
- nxids * sizeof(TransactionId));
- subcount += nxids;
+ nsubxids * sizeof(TransactionId));
+ subcount += nsubxids;
}
}
}
}
RecentXmin = xmin;
+ Assert(TransactionIdPrecedesOrEquals(TransactionXmin, RecentXmin));
snapshot->xmin = xmin;
snapshot->xmax = xmax;
* GetRunningTransactionData -- returns information about running transactions.
*
* Similar to GetSnapshotData but returns more information. We include
- * all PGXACTs with an assigned TransactionId, even VACUUM processes and
+ * all PGPROCs with an assigned TransactionId, even VACUUM processes and
* prepared transactions.
*
* We acquire XidGenLock and ProcArrayLock, but the caller is responsible for
* This is never executed during recovery so there is no need to look at
* KnownAssignedXids.
*
- * Dummy PGXACTs from prepared transaction are included, meaning that this
+ * Dummy PGPROCs from prepared transaction are included, meaning that this
* may return entries with duplicated TransactionId values coming from
* transaction finishing to prepare. Nothing is done about duplicated
* entries here to not hold on ProcArrayLock more than necessary.
static RunningTransactionsData CurrentRunningXactsData;
ProcArrayStruct *arrayP = procArray;
+ TransactionId *other_xids = ProcGlobal->xids;
RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
TransactionId latestCompletedXid;
TransactionId oldestRunningXid;
TransactionId xid;
/* Fetch xid just once - see GetNewTransactionId */
- xid = UINT32_ACCESS_ONCE(pgxact->xid);
+ xid = UINT32_ACCESS_ONCE(other_xids[index]);
/*
* We don't need to store transactions that don't have a TransactionId
* GetOldestActiveTransactionId()
*
* Similar to GetSnapshotData but returns just oldestActiveXid. We include
- * all PGXACTs with an assigned TransactionId, even VACUUM processes.
+ * all PGPROCs with an assigned TransactionId, even VACUUM processes.
* We look at all databases, though there is no need to include WALSender
* since this has no effect on hot standby conflicts.
*
GetOldestActiveTransactionId(void)
{
ProcArrayStruct *arrayP = procArray;
+ TransactionId *other_xids = ProcGlobal->xids;
TransactionId oldestRunningXid;
int index;
LWLockAcquire(ProcArrayLock, LW_SHARED);
for (index = 0; index < arrayP->numProcs; index++)
{
- int pgprocno = arrayP->pgprocnos[index];
- PGXACT *pgxact = &allPgXact[pgprocno];
TransactionId xid;
/* Fetch xid just once - see GetNewTransactionId */
- xid = UINT32_ACCESS_ONCE(pgxact->xid);
+ xid = UINT32_ACCESS_ONCE(other_xids[index]);
if (!TransactionIdIsNormal(xid))
continue;
* If we're not in recovery, we walk over the procarray and collect the
* lowest xid. Since we're called with ProcArrayLock held and have
* acquired XidGenLock, no entries can vanish concurrently, since
- * PGXACT->xid is only set with XidGenLock held and only cleared with
- * ProcArrayLock held.
+ * ProcGlobal->xids[i] is only set with XidGenLock held and only cleared
+ * with ProcArrayLock held.
*
* In recovery we can't lower the safe value besides what we've computed
* above, so we'll have to wait a bit longer there. We unfortunately can
*/
if (!recovery_in_progress)
{
+ TransactionId *other_xids = ProcGlobal->xids;
+
/*
- * Spin over procArray collecting all min(PGXACT->xid)
+ * Spin over procArray collecting min(ProcGlobal->xids[i])
*/
for (index = 0; index < arrayP->numProcs; index++)
{
- int pgprocno = arrayP->pgprocnos[index];
- PGXACT *pgxact = &allPgXact[pgprocno];
TransactionId xid;
/* Fetch xid just once - see GetNewTransactionId */
- xid = UINT32_ACCESS_ONCE(pgxact->xid);
+ xid = UINT32_ACCESS_ONCE(other_xids[index]);
if (!TransactionIdIsNormal(xid))
continue;
{
int result = 0;
ProcArrayStruct *arrayP = procArray;
+ TransactionId *other_xids = ProcGlobal->xids;
int index;
if (xid == InvalidTransactionId) /* never match invalid xid */
{
int pgprocno = arrayP->pgprocnos[index];
PGPROC *proc = &allProcs[pgprocno];
- PGXACT *pgxact = &allPgXact[pgprocno];
- if (pgxact->xid == xid)
+ if (other_xids[index] == xid)
{
result = proc->pid;
break;
{
int pgprocno = arrayP->pgprocnos[index];
PGPROC *proc = &allProcs[pgprocno];
- PGXACT *pgxact = &allPgXact[pgprocno];
/*
* Since we're not holding a lock, need to be prepared to deal with
continue; /* do not count deleted entries */
if (proc == MyProc)
continue; /* do not count myself */
- if (pgxact->xid == InvalidTransactionId)
+ if (proc->xid == InvalidTransactionId)
continue; /* do not count if no XID assigned */
if (proc->pid == 0)
continue; /* do not count prepared xacts */
*
* Note that we do not have to be careful about memory ordering of our own
* reads wrt. GetNewTransactionId() here - only this process can modify
- * relevant fields of MyProc/MyPgXact. But we do have to be careful about
- * our own writes being well ordered.
+ * relevant fields of MyProc/ProcGlobal->xids[]. But we do have to be
+ * careful about our own writes being well ordered.
*/
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
* In Hot Standby mode, we maintain a list of transactions that are (or were)
* running on the primary at the current point in WAL. These XIDs must be
* treated as running by standby transactions, even though they are not in
- * the standby server's PGXACT array.
+ * the standby server's PGPROC array.
*
* We record all XIDs that we know have been assigned. That includes all the
* XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have
* distinguished from a real one at need by the fact that it has pid == 0.
* The semaphore and lock-activity fields in a prepared-xact PGPROC are unused,
* but its myProcLocks[] lists are valid.
+ *
+ * Mirrored fields:
+ *
+ * Some fields in PGPROC (see "mirrored in ..." comment) are mirrored into an
+ * element of more densely packed ProcGlobal arrays. These arrays are indexed
+ * by PGPROC->pgxactoff. Both copies need to be maintained coherently.
+ *
+ * NB: The pgxactoff indexed value can *never* be accessed without holding
+ * locks.
+ *
+ * See PROC_HDR for details.
*/
struct PGPROC
{
Latch procLatch; /* generic latch for process */
+
+ TransactionId xid; /* id of top-level transaction currently being
+ * executed by this proc, if running and XID
+ * is assigned; else InvalidTransactionId.
+ * mirrored in ProcGlobal->xids[pgxactoff] */
+
TransactionId xmin; /* minimal running XID as it was when we were
* starting our xact, excluding LAZY VACUUM:
* vacuum must not remove tuples deleted by
* being executed by this proc, if running;
* else InvalidLocalTransactionId */
int pid; /* Backend's process ID; 0 if prepared xact */
+
+ int pgxactoff; /* offset into various ProcGlobal->arrays
+ * with data mirrored from this PGPROC */
int pgprocno;
/* These fields are zero while a backend is still starting up: */
*/
typedef struct PGXACT
{
- TransactionId xid; /* id of top-level transaction currently being
- * executed by this proc, if running and XID
- * is assigned; else InvalidTransactionId */
-
uint8 vacuumFlags; /* vacuum-related flags, see above */
bool overflowed;
/*
* There is one ProcGlobal struct for the whole database cluster.
+ *
+ * Adding/Removing an entry into the procarray requires holding *both*
+ * ProcArrayLock and XidGenLock in exclusive mode (in that order). Both are
+ * needed because the dense arrays (see below) are accessed from
+ * GetNewTransactionId() and GetSnapshotData(), and we don't want to add
+ * further contention by both using the same lock. Adding/Removing a procarray
+ * entry is much less frequent.
+ *
+ * Some fields in PGPROC are mirrored into more densely packed arrays (e.g.
+ * xids), with one entry for each backend. These arrays only contain entries
+ * for PGPROCs that have been added to the shared array with ProcArrayAdd()
+ * (in contrast to PGPROC array which has unused PGPROCs interspersed).
+ *
+ * The dense arrays are indexed by PGPROC->pgxactoff. Any concurrent
+ * ProcArrayAdd() / ProcArrayRemove() can lead to pgxactoff of a procarray
+ * member to change. Therefore it is only safe to use PGPROC->pgxactoff to
+ * access the dense array while holding either ProcArrayLock or XidGenLock.
+ *
+ * As long as a PGPROC is in the procarray, the mirrored values need to be
+ * maintained in both places in a coherent manner.
+ *
+ * The denser separate arrays are beneficial for three main reasons: First, to
+ * allow for as tight loops accessing the data as possible. Second, to prevent
+ * updates of frequently changing data (e.g. xmin) from invalidating
+ * cachelines also containing less frequently changing data (e.g. xid,
+ * vacuumFlags). Third to condense frequently accessed data into as few
+ * cachelines as possible.
+ *
+ * There are two main reasons to have the data mirrored between these dense
+ * arrays and PGPROC. First, as explained above, a PGPROC's array entries can
+ * only be accessed with either ProcArrayLock or XidGenLock held, whereas the
+ * PGPROC entries do not require that (obviously there may still be locking
+ * requirements around the individual field, separate from the concerns
+ * here). That is particularly important for a backend to efficiently checks
+ * it own values, which it often can safely do without locking. Second, the
+ * PGPROC fields allow to avoid unnecessary accesses and modification to the
+ * dense arrays. A backend's own PGPROC is more likely to be in a local cache,
+ * whereas the cachelines for the dense array will be modified by other
+ * backends (often removing it from the cache for other cores/sockets). At
+ * commit/abort time a check of the PGPROC value can avoid accessing/dirtying
+ * the corresponding array value.
+ *
+ * Basically it makes sense to access the PGPROC variable when checking a
+ * single backend's data, especially when already looking at the PGPROC for
+ * other reasons already. It makes sense to look at the "dense" arrays if we
+ * need to look at many / most entries, because we then benefit from the
+ * reduced indirection and better cross-process cache-ability.
+ *
+ * When entering a PGPROC for 2PC transactions with ProcArrayAdd(), the data
+ * in the dense arrays is initialized from the PGPROC while it already holds
+ * ProcArrayLock.
*/
typedef struct PROC_HDR
{
PGPROC *allProcs;
/* Array of PGXACT structures (not including dummies for prepared txns) */
PGXACT *allPgXact;
+
+ /* Array mirroring PGPROC.xid for each PGPROC currently in the procarray */
+ TransactionId *xids;
+
/* Length of allProcs array */
uint32 allProcCount;
/* Head of list of free PGPROC structures */