Skip to content

Commit d3944c3

Browse files
Fix undercounting in VACUUM VERBOSE output.
The logic for determining how many nbtree pages in an index are deleted pages sometimes undercounted pages. Pages that were deleted by the current VACUUM operation (as opposed to some previous VACUUM operation whose deleted pages have yet to be reused) were sometimes overlooked. The final count is exposed to users through VACUUM VERBOSE's "%u index pages have been deleted" output. btvacuumpage() avoided double-counting when _bt_pagedel() deleted more than one page by assuming that only one page was deleted, and that the additional deleted pages would get picked up during a future call to btvacuumpage() by the same VACUUM operation. _bt_pagedel() can legitimately delete pages that the btvacuumscan() scan will not visit again, though, so that assumption was slightly faulty. Fix the accounting by teaching _bt_pagedel() about its caller's requirements. It now only reports on pages that it knows btvacuumscan() won't visit again (including the current btvacuumpage() page), so everything works out in the end. This bug has been around forever. Only backpatch to v11, though, to keep _bt_pagedel() is sync on the branches that have today's bugfix commit b0229f2. Note that this commit changes the signature of _bt_pagedel(), just like commit b0229f2. Author: Peter Geoghegan Reviewed-By: Masahiko Sawada Discussion: https://postgr.es/m/CAH2-WzkrXBcMQWAYUJMFTTvzx_r4q=pYSjDe07JnUXhe+OZnJA@mail.gmail.com Backpatch: 11-
1 parent e4fa6c9 commit d3944c3

File tree

3 files changed

+37
-18
lines changed

3 files changed

+37
-18
lines changed

src/backend/access/nbtree/nbtpage.c

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@
3535

3636
static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack);
3737
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
38-
bool *rightsib_empty, TransactionId *oldestBtpoXact);
38+
BlockNumber scanblkno, bool *rightsib_empty,
39+
TransactionId *oldestBtpoXact, uint32 *ndeleted);
3940
static bool _bt_lock_branch_parent(Relation rel, BlockNumber child,
4041
BTStack stack, Buffer *topparent, OffsetNumber *topoff,
4142
BlockNumber *target, BlockNumber *rightsib);
@@ -1236,7 +1237,9 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
12361237
*
12371238
* Returns the number of pages successfully deleted (zero if page cannot
12381239
* be deleted now; could be more than one if parent or right sibling pages
1239-
* were deleted too).
1240+
* were deleted too). Note that this does not include pages that we delete
1241+
* that the btvacuumscan scan has yet to reach; they'll get counted later
1242+
* instead.
12401243
*
12411244
* Maintains *oldestBtpoXact for any pages that get deleted. Caller is
12421245
* responsible for maintaining *oldestBtpoXact in the case of pages that were
@@ -1246,15 +1249,21 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
12461249
* carefully, it's better to run it in a temp context that can be reset
12471250
* frequently.
12481251
*/
1249-
int
1252+
uint32
12501253
_bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
12511254
{
1252-
int ndeleted = 0;
1255+
uint32 ndeleted = 0;
12531256
BlockNumber rightsib;
12541257
bool rightsib_empty;
12551258
Page page;
12561259
BTPageOpaque opaque;
12571260

1261+
/*
1262+
* Save original leafbuf block number from caller. Only deleted blocks
1263+
* that are <= scanblkno get counted in ndeleted return value.
1264+
*/
1265+
BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
1266+
12581267
/*
12591268
* "stack" is a search stack leading (approximately) to the target page.
12601269
* It is initially NULL, but when iterating, we keep it to avoid
@@ -1305,8 +1314,9 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
13051314
if (P_ISDELETED(opaque))
13061315
ereport(LOG,
13071316
(errcode(ERRCODE_INDEX_CORRUPTED),
1308-
errmsg_internal("found deleted block %u while following right link in index \"%s\"",
1317+
errmsg_internal("found deleted block %u while following right link from block %u in index \"%s\"",
13091318
BufferGetBlockNumber(leafbuf),
1319+
scanblkno,
13101320
RelationGetRelationName(rel))));
13111321

13121322
_bt_relbuf(rel, leafbuf);
@@ -1456,13 +1466,13 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
14561466
while (P_ISHALFDEAD(opaque))
14571467
{
14581468
/* Check for interrupts in _bt_unlink_halfdead_page */
1459-
if (!_bt_unlink_halfdead_page(rel, leafbuf, &rightsib_empty,
1460-
oldestBtpoXact))
1469+
if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
1470+
&rightsib_empty, oldestBtpoXact,
1471+
&ndeleted))
14611472
{
14621473
/* _bt_unlink_halfdead_page failed, released buffer */
14631474
return ndeleted;
14641475
}
1465-
ndeleted++;
14661476
}
14671477

14681478
Assert(P_ISLEAF(opaque) && P_ISDELETED(opaque));
@@ -1704,8 +1714,9 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
17041714
* to avoid having to reacquire a lock we already released).
17051715
*/
17061716
static bool
1707-
_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
1708-
TransactionId *oldestBtpoXact)
1717+
_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
1718+
bool *rightsib_empty, TransactionId *oldestBtpoXact,
1719+
uint32 *ndeleted)
17091720
{
17101721
BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
17111722
BlockNumber leafleftsib;
@@ -2089,6 +2100,14 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
20892100
TransactionIdPrecedes(opaque->btpo.xact, *oldestBtpoXact))
20902101
*oldestBtpoXact = opaque->btpo.xact;
20912102

2103+
/*
2104+
* If btvacuumscan won't revisit this page in a future btvacuumpage call
2105+
* and count it as deleted then, we count it as deleted by current
2106+
* btvacuumpage call
2107+
*/
2108+
if (target <= scanblkno)
2109+
(*ndeleted)++;
2110+
20922111
/*
20932112
* Release the target, if it was not the leaf block. The leaf is always
20942113
* kept locked.

src/backend/access/nbtree/nbtree.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1340,17 +1340,17 @@ btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
13401340
if (delete_now)
13411341
{
13421342
MemoryContext oldcontext;
1343-
int ndel;
13441343

13451344
/* Run pagedel in a temp context to avoid memory leakage */
13461345
MemoryContextReset(vstate->pagedelcontext);
13471346
oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
13481347

1349-
ndel = _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
1350-
1351-
/* count only this page, else may double-count parent */
1352-
if (ndel)
1353-
stats->pages_deleted++;
1348+
/*
1349+
* We trust the _bt_pagedel return value because it does not include
1350+
* any page that a future call here from btvacuumscan is expected to
1351+
* count. There will be no double-counting.
1352+
*/
1353+
stats->pages_deleted += _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
13541354

13551355
MemoryContextSwitchTo(oldcontext);
13561356
/* pagedel released buffer, so we shouldn't */

src/include/access/nbtree.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -553,8 +553,8 @@ extern void _bt_delitems_delete(Relation rel, Buffer buf,
553553
extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
554554
OffsetNumber *itemnos, int nitems,
555555
BlockNumber lastBlockVacuumed);
556-
extern int _bt_pagedel(Relation rel, Buffer leafbuf,
557-
TransactionId *oldestBtpoXact);
556+
extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf,
557+
TransactionId *oldestBtpoXact);
558558

559559
/*
560560
* prototypes for functions in nbtsearch.c

0 commit comments

Comments
 (0)