Skip to content

Commit b4f697f

Browse files
committed
Fix Hot-Standby initialization of clog and subtrans.
These bugs can cause data loss on standbys started with hot_standby=on at the moment they start to accept read only queries, by marking committed transactions as uncommited. The likelihood of such corruptions is small unless the primary has a high transaction rate. 5a031a5 fixed bugs in HS's startup logic by maintaining less state until at least STANDBY_SNAPSHOT_PENDING state was reached, missing the fact that both clog and subtrans are written to before that. This only failed to fail in common cases because the usage of ExtendCLOG in procarray.c was superflous since clog extensions are actually WAL logged. f44eedc/I then tried to fix the missing extensions of pg_subtrans due to the former commit's changes - which are not WAL logged - by performing the extensions when switching to a state > STANDBY_INITIALIZED and not performing xid assignments before that - again missing the fact that ExtendCLOG is unneccessary - but screwed up twice: Once because latestObservedXid wasn't updated anymore in that state due to the earlier commit and once by having an off-by-one error in the loop performing extensions. This means that whenever a CLOG_XACTS_PER_PAGE (32768 with default settings) boundary was crossed between the start of the checkpoint recovery started from and the first xl_running_xact record old transactions commit bits in pg_clog could be overwritten if they started and committed in that window. Fix this mess by not performing ExtendCLOG() in HS at all anymore since it's unneeded and evidently dangerous and by performing subtrans extensions even before reaching STANDBY_SNAPSHOT_PENDING. Analysis and patch by Andres Freund. Reported by Christophe Pettus. Backpatch down to 9.0, like the previous commit that caused this.
1 parent 32b3284 commit b4f697f

File tree

2 files changed

+41
-29
lines changed

2 files changed

+41
-29
lines changed

src/backend/access/transam/clog.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,7 @@ ExtendCLOG(TransactionId newestXact)
593593
LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
594594

595595
/* Zero the page and make an XLOG entry about it */
596-
ZeroCLOGPage(pageno, !InRecovery);
596+
ZeroCLOGPage(pageno, true);
597597

598598
LWLockRelease(CLogControlLock);
599599
}

src/backend/storage/ipc/procarray.c

Lines changed: 40 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ ProcArrayClearTransaction(PGPROC *proc)
439439
* ProcArrayInitRecovery -- initialize recovery xid mgmt environment
440440
*
441441
* Remember up to where the startup process initialized the CLOG and subtrans
442-
* so we can ensure its initialized gaplessly up to the point where necessary
442+
* so we can ensure it's initialized gaplessly up to the point where necessary
443443
* while in recovery.
444444
*/
445445
void
@@ -449,9 +449,10 @@ ProcArrayInitRecovery(TransactionId initializedUptoXID)
449449
Assert(TransactionIdIsNormal(initializedUptoXID));
450450

451451
/*
452-
* we set latestObservedXid to the xid SUBTRANS has been initialized upto
453-
* so we can extend it from that point onwards when we reach a consistent
454-
* state in ProcArrayApplyRecoveryInfo().
452+
* we set latestObservedXid to the xid SUBTRANS has been initialized upto,
453+
* so we can extend it from that point onwards in
454+
* RecordKnownAssignedTransactionIds, and when we get consistent in
455+
* ProcArrayApplyRecoveryInfo().
455456
*/
456457
latestObservedXid = initializedUptoXID;
457458
TransactionIdRetreat(latestObservedXid);
@@ -620,17 +621,23 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
620621
pfree(xids);
621622

622623
/*
623-
* latestObservedXid is set to the the point where SUBTRANS was started up
624-
* to, initialize subtrans from thereon, up to nextXid - 1.
624+
* latestObservedXid is at least set to the the point where SUBTRANS was
625+
* started up to (c.f. ProcArrayInitRecovery()) or to the biggest xid
626+
* RecordKnownAssignedTransactionIds() was called for. Initialize
627+
* subtrans from thereon, up to nextXid - 1.
628+
*
629+
* We need to duplicate parts of RecordKnownAssignedTransactionId() here,
630+
* because we've just added xids to the known assigned xids machinery that
631+
* haven't gone through RecordKnownAssignedTransactionId().
625632
*/
626633
Assert(TransactionIdIsNormal(latestObservedXid));
634+
TransactionIdAdvance(latestObservedXid);
627635
while (TransactionIdPrecedes(latestObservedXid, running->nextXid))
628636
{
629-
ExtendCLOG(latestObservedXid);
630637
ExtendSUBTRANS(latestObservedXid);
631-
632638
TransactionIdAdvance(latestObservedXid);
633639
}
640+
TransactionIdRetreat(latestObservedXid); /* = running->nextXid - 1 */
634641

635642
/* ----------
636643
* Now we've got the running xids we need to set the global values that
@@ -705,10 +712,6 @@ ProcArrayApplyXidAssignment(TransactionId topxid,
705712

706713
Assert(standbyState >= STANDBY_INITIALIZED);
707714

708-
/* can't do anything useful unless we have more state setup */
709-
if (standbyState == STANDBY_INITIALIZED)
710-
return;
711-
712715
max_xid = TransactionIdLatest(topxid, nsubxids, subxids);
713716

714717
/*
@@ -735,6 +738,10 @@ ProcArrayApplyXidAssignment(TransactionId topxid,
735738
for (i = 0; i < nsubxids; i++)
736739
SubTransSetParent(subxids[i], topxid, false);
737740

741+
/* KnownAssignedXids isn't maintained yet, so we're done for now */
742+
if (standbyState == STANDBY_INITIALIZED)
743+
return;
744+
738745
/*
739746
* Uses same locking as transaction commit
740747
*/
@@ -2427,18 +2434,11 @@ RecordKnownAssignedTransactionIds(TransactionId xid)
24272434
{
24282435
Assert(standbyState >= STANDBY_INITIALIZED);
24292436
Assert(TransactionIdIsValid(xid));
2437+
Assert(TransactionIdIsValid(latestObservedXid));
24302438

24312439
elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
24322440
xid, latestObservedXid);
24332441

2434-
/*
2435-
* If the KnownAssignedXids machinery isn't up yet, do nothing.
2436-
*/
2437-
if (standbyState <= STANDBY_INITIALIZED)
2438-
return;
2439-
2440-
Assert(TransactionIdIsValid(latestObservedXid));
2441-
24422442
/*
24432443
* When a newly observed xid arrives, it is frequently the case that it is
24442444
* *not* the next xid in sequence. When this occurs, we must treat the
@@ -2449,22 +2449,34 @@ RecordKnownAssignedTransactionIds(TransactionId xid)
24492449
TransactionId next_expected_xid;
24502450

24512451
/*
2452-
* Extend clog and subtrans like we do in GetNewTransactionId() during
2453-
* normal operation using individual extend steps. Typical case
2454-
* requires almost no activity.
2452+
* Extend subtrans like we do in GetNewTransactionId() during normal
2453+
* operation using individual extend steps. Note that we do not need
2454+
* to extend clog since its extensions are WAL logged.
2455+
*
2456+
* This part has to be done regardless of standbyState since we
2457+
* immediately start assigning subtransactions to their toplevel
2458+
* transactions.
24552459
*/
24562460
next_expected_xid = latestObservedXid;
2457-
TransactionIdAdvance(next_expected_xid);
2458-
while (TransactionIdPrecedesOrEquals(next_expected_xid, xid))
2461+
while (TransactionIdPrecedes(next_expected_xid, xid))
24592462
{
2460-
ExtendCLOG(next_expected_xid);
2463+
TransactionIdAdvance(next_expected_xid);
24612464
ExtendSUBTRANS(next_expected_xid);
2465+
}
2466+
Assert(next_expected_xid == xid);
24622467

2463-
TransactionIdAdvance(next_expected_xid);
2468+
/*
2469+
* If the KnownAssignedXids machinery isn't up yet, there's nothing
2470+
* more to do since we don't track assigned xids yet.
2471+
*/
2472+
if (standbyState <= STANDBY_INITIALIZED)
2473+
{
2474+
latestObservedXid = xid;
2475+
return;
24642476
}
24652477

24662478
/*
2467-
* Add the new xids onto the KnownAssignedXids array.
2479+
* Add (latestObservedXid, xid] onto the KnownAssignedXids array.
24682480
*/
24692481
next_expected_xid = latestObservedXid;
24702482
TransactionIdAdvance(next_expected_xid);

0 commit comments

Comments
 (0)