Skip to content

Commit 8e7e672

Browse files
committed
WAL-log inplace update before revealing it to other sessions.
A buffer lock won't stop a reader having already checked tuple visibility. If a vac_update_datfrozenid() and then a crash happened during inplace update of a relfrozenxid value, datfrozenxid could overtake relfrozenxid. That could lead to "could not access status of transaction" errors. Back-patch to v12 (all supported versions). In v14 and earlier, this also back-patches the assertion removal from commit 7fcf2fa. Discussion: https://postgr.es/m/20240620012908.92.nmisch@google.com
1 parent 243e9b4 commit 8e7e672

File tree

2 files changed

+46
-16
lines changed

2 files changed

+46
-16
lines changed

src/backend/access/heap/README.tuplock

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,4 @@ Inplace updates create an exception to the rule that tuple data won't change
203203
under a reader holding a pin. A reader of a heap_fetch() result tuple may
204204
witness a torn read. Current inplace-updated fields are aligned and are no
205205
wider than four bytes, and current readers don't need consistency across
206-
fields. Hence, they get by with just fetching each field once. XXX such a
207-
caller may also read a value that has not reached WAL; see
208-
systable_inplace_update_finish().
206+
fields. Hence, they get by with just fetching each field once.

src/backend/access/heap/heapam.c

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6326,6 +6326,8 @@ heap_inplace_update_and_unlock(Relation relation,
63266326
HeapTupleHeader htup = oldtup->t_data;
63276327
uint32 oldlen;
63286328
uint32 newlen;
6329+
char *dst;
6330+
char *src;
63296331
int nmsgs = 0;
63306332
SharedInvalidationMessage *invalMessages = NULL;
63316333
bool RelcacheInitFileInval = false;
@@ -6336,6 +6338,9 @@ heap_inplace_update_and_unlock(Relation relation,
63366338
if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
63376339
elog(ERROR, "wrong tuple length");
63386340

6341+
dst = (char *) htup + htup->t_hoff;
6342+
src = (char *) tuple->t_data + tuple->t_data->t_hoff;
6343+
63396344
/*
63406345
* Construct shared cache inval if necessary. Note that because we only
63416346
* pass the new version of the tuple, this mustn't be used for any
@@ -6359,15 +6364,15 @@ heap_inplace_update_and_unlock(Relation relation,
63596364
*/
63606365
PreInplace_Inval();
63616366

6362-
/* NO EREPORT(ERROR) from here till changes are logged */
6363-
START_CRIT_SECTION();
6364-
6365-
memcpy((char *) htup + htup->t_hoff,
6366-
(char *) tuple->t_data + tuple->t_data->t_hoff,
6367-
newlen);
6368-
63696367
/*----------
6370-
* XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6368+
* NO EREPORT(ERROR) from here till changes are complete
6369+
*
6370+
* Our buffer lock won't stop a reader having already pinned and checked
6371+
* visibility for this tuple. Hence, we write WAL first, then mutate the
6372+
* buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6373+
* checkpoint delay makes that acceptable. With the usual order of
6374+
* changes, a crash after memcpy() and before XLogInsert() could allow
6375+
* datfrozenxid to overtake relfrozenxid:
63716376
*
63726377
* ["D" is a VACUUM (ONLY_DATABASE_STATS)]
63736378
* ["R" is a VACUUM tbl]
@@ -6377,14 +6382,28 @@ heap_inplace_update_and_unlock(Relation relation,
63776382
* D: raise pg_database.datfrozenxid, XLogInsert(), finish
63786383
* [crash]
63796384
* [recovery restores datfrozenxid w/o relfrozenxid]
6385+
*
6386+
* Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6387+
* the buffer to the stack before logging. Here, that facilitates a FPI
6388+
* of the post-mutation block before we accept other sessions seeing it.
63806389
*/
6381-
6382-
MarkBufferDirty(buffer);
6390+
Assert((MyProc->delayChkptFlags & DELAY_CHKPT_START) == 0);
6391+
START_CRIT_SECTION();
6392+
MyProc->delayChkptFlags |= DELAY_CHKPT_START;
63836393

63846394
/* XLOG stuff */
63856395
if (RelationNeedsWAL(relation))
63866396
{
63876397
xl_heap_inplace xlrec;
6398+
PGAlignedBlock copied_buffer;
6399+
char *origdata = (char *) BufferGetBlock(buffer);
6400+
Page page = BufferGetPage(buffer);
6401+
uint16 lower = ((PageHeader) page)->pd_lower;
6402+
uint16 upper = ((PageHeader) page)->pd_upper;
6403+
uintptr_t dst_offset_in_block;
6404+
RelFileLocator rlocator;
6405+
ForkNumber forkno;
6406+
BlockNumber blkno;
63886407
XLogRecPtr recptr;
63896408

63906409
xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
@@ -6399,16 +6418,28 @@ heap_inplace_update_and_unlock(Relation relation,
63996418
XLogRegisterData((char *) invalMessages,
64006419
nmsgs * sizeof(SharedInvalidationMessage));
64016420

6402-
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6403-
XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
6421+
/* register block matching what buffer will look like after changes */
6422+
memcpy(copied_buffer.data, origdata, lower);
6423+
memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper);
6424+
dst_offset_in_block = dst - origdata;
6425+
memcpy(copied_buffer.data + dst_offset_in_block, src, newlen);
6426+
BufferGetTag(buffer, &rlocator, &forkno, &blkno);
6427+
Assert(forkno == MAIN_FORKNUM);
6428+
XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data,
6429+
REGBUF_STANDARD);
6430+
XLogRegisterBufData(0, src, newlen);
64046431

64056432
/* inplace updates aren't decoded atm, don't log the origin */
64066433

64076434
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
64086435

6409-
PageSetLSN(BufferGetPage(buffer), recptr);
6436+
PageSetLSN(page, recptr);
64106437
}
64116438

6439+
memcpy(dst, src, newlen);
6440+
6441+
MarkBufferDirty(buffer);
6442+
64126443
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
64136444

64146445
/*
@@ -6421,6 +6452,7 @@ heap_inplace_update_and_unlock(Relation relation,
64216452
*/
64226453
AtInplace_Inval();
64236454

6455+
MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
64246456
END_CRIT_SECTION();
64256457
UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
64266458

0 commit comments

Comments
 (0)