Skip to content

Commit 2d24ca0

Browse files
committed
Fix handling of WAL segments ready to be archived during crash recovery
78ea8b5 has fixed an issue related to the recycling of WAL segments on standbys depending on archive_mode. However, it has introduced a regression with the handling of WAL segments ready to be archived during crash recovery, causing those files to be recycled without getting archived. This commit fixes the regression by tracking in shared memory if a live cluster is either in crash recovery or archive recovery as the handling of WAL segments ready to be archived is different in both cases (those WAL segments should not be removed during crash recovery), and by using this new shared memory state to decide if a segment can be recycled or not. Previously, it was not possible to know if a cluster was in crash recovery or archive recovery as the shared state was able to track only if recovery was happening or not, leading to the problem. A set of TAP tests is added to close the gap here, making sure that WAL segments ready to be archived are correctly handled when a cluster is in archive or crash recovery with archive_mode set to "on" or "always", for both standby and primary. Reported-by: Benoît Lobréau Author: Jehan-Guillaume de Rorthais Reviewed-by: Kyotaro Horiguchi, Fujii Masao, Michael Paquier Discussion: https://postgr.es/m/20200331172229.40ee00dc@firost Backpatch-through: 9.5
1 parent 34b7020 commit 2d24ca0

File tree

4 files changed

+285
-16
lines changed

4 files changed

+285
-16
lines changed

src/backend/access/transam/xlog.c

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,9 @@ static TimeLineID receiveTLI = 0;
211211
static bool lastFullPageWrites;
212212

213213
/*
214-
* Local copy of SharedRecoveryInProgress variable. True actually means "not
215-
* known, need to check the shared state".
214+
* Local copy of the state tracked by SharedRecoveryState in shared memory,
215+
* It is false if SharedRecoveryState is RECOVERY_STATE_DONE. True actually
216+
* means "not known, need to check the shared state".
216217
*/
217218
static bool LocalRecoveryInProgress = true;
218219

@@ -637,10 +638,10 @@ typedef struct XLogCtlData
637638
char archiveCleanupCommand[MAXPGPATH];
638639

639640
/*
640-
* SharedRecoveryInProgress indicates if we're still in crash or archive
641+
* SharedRecoveryState indicates if we're still in crash or archive
641642
* recovery. Protected by info_lck.
642643
*/
643-
bool SharedRecoveryInProgress;
644+
RecoveryState SharedRecoveryState;
644645

645646
/*
646647
* SharedHotStandbyActive indicates if we're still in crash or archive
@@ -4306,6 +4307,16 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode,
43064307
updateMinRecoveryPoint = true;
43074308

43084309
UpdateControlFile();
4310+
4311+
/*
4312+
* We update SharedRecoveryState while holding the lock on
4313+
* ControlFileLock so both states are consistent in shared
4314+
* memory.
4315+
*/
4316+
SpinLockAcquire(&XLogCtl->info_lck);
4317+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE;
4318+
SpinLockRelease(&XLogCtl->info_lck);
4319+
43094320
LWLockRelease(ControlFileLock);
43104321

43114322
CheckRecoveryConsistency();
@@ -5047,7 +5058,7 @@ XLOGShmemInit(void)
50475058
* in additional info.)
50485059
*/
50495060
XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
5050-
XLogCtl->SharedRecoveryInProgress = true;
5061+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
50515062
XLogCtl->SharedHotStandbyActive = false;
50525063
XLogCtl->WalWriterSleeping = false;
50535064

@@ -6859,7 +6870,13 @@ StartupXLOG(void)
68596870
*/
68606871
dbstate_at_startup = ControlFile->state;
68616872
if (InArchiveRecovery)
6873+
{
68626874
ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
6875+
6876+
SpinLockAcquire(&XLogCtl->info_lck);
6877+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE;
6878+
SpinLockRelease(&XLogCtl->info_lck);
6879+
}
68636880
else
68646881
{
68656882
ereport(LOG,
@@ -6872,6 +6889,10 @@ StartupXLOG(void)
68726889
ControlFile->checkPointCopy.ThisTimeLineID,
68736890
recoveryTargetTLI)));
68746891
ControlFile->state = DB_IN_CRASH_RECOVERY;
6892+
6893+
SpinLockAcquire(&XLogCtl->info_lck);
6894+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
6895+
SpinLockRelease(&XLogCtl->info_lck);
68756896
}
68766897
ControlFile->checkPoint = checkPointLoc;
68776898
ControlFile->checkPointCopy = checkPoint;
@@ -7896,7 +7917,7 @@ StartupXLOG(void)
78967917
ControlFile->time = (pg_time_t) time(NULL);
78977918

78987919
SpinLockAcquire(&XLogCtl->info_lck);
7899-
XLogCtl->SharedRecoveryInProgress = false;
7920+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_DONE;
79007921
SpinLockRelease(&XLogCtl->info_lck);
79017922

79027923
UpdateControlFile();
@@ -8042,7 +8063,7 @@ RecoveryInProgress(void)
80428063
*/
80438064
volatile XLogCtlData *xlogctl = XLogCtl;
80448065

8045-
LocalRecoveryInProgress = xlogctl->SharedRecoveryInProgress;
8066+
LocalRecoveryInProgress = (xlogctl->SharedRecoveryState != RECOVERY_STATE_DONE);
80468067

80478068
/*
80488069
* Initialize TimeLineID and RedoRecPtr when we discover that recovery
@@ -8054,8 +8075,8 @@ RecoveryInProgress(void)
80548075
{
80558076
/*
80568077
* If we just exited recovery, make sure we read TimeLineID and
8057-
* RedoRecPtr after SharedRecoveryInProgress (for machines with
8058-
* weak memory ordering).
8078+
* RedoRecPtr after SharedRecoveryState (for machines with weak
8079+
* memory ordering).
80598080
*/
80608081
pg_memory_barrier();
80618082
InitXLOGAccess();
@@ -8071,6 +8092,24 @@ RecoveryInProgress(void)
80718092
}
80728093
}
80738094

8095+
/*
8096+
* Returns current recovery state from shared memory.
8097+
*
8098+
* This returned state is kept consistent with the contents of the control
8099+
* file. See details about the possible values of RecoveryState in xlog.h.
8100+
*/
8101+
RecoveryState
8102+
GetRecoveryState(void)
8103+
{
8104+
RecoveryState retval;
8105+
8106+
SpinLockAcquire(&XLogCtl->info_lck);
8107+
retval = XLogCtl->SharedRecoveryState;
8108+
SpinLockRelease(&XLogCtl->info_lck);
8109+
8110+
return retval;
8111+
}
8112+
80748113
/*
80758114
* Is HotStandby active yet? This is only important in special backends
80768115
* since normal backends won't ever be able to connect until this returns

src/backend/access/transam/xlogarchive.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -610,18 +610,25 @@ XLogArchiveCheckDone(const char *xlog)
610610
{
611611
char archiveStatusPath[MAXPGPATH];
612612
struct stat stat_buf;
613-
bool inRecovery = RecoveryInProgress();
613+
614+
/* The file is always deletable if archive_mode is "off". */
615+
if (!XLogArchivingActive())
616+
return true;
614617

615618
/*
616-
* The file is always deletable if archive_mode is "off". On standbys
617-
* archiving is disabled if archive_mode is "on", and enabled with
618-
* "always". On a primary, archiving is enabled if archive_mode is "on"
619-
* or "always".
619+
* During archive recovery, the file is deletable if archive_mode is not
620+
* "always".
620621
*/
621-
if (!((XLogArchivingActive() && !inRecovery) ||
622-
(XLogArchivingAlways() && inRecovery)))
622+
if (!XLogArchivingAlways() &&
623+
GetRecoveryState() == RECOVERY_STATE_ARCHIVE)
623624
return true;
624625

626+
/*
627+
* At this point of the logic, note that we are either a primary with
628+
* archive_mode set to "on" or "always", or a standby with archive_mode
629+
* set to "always".
630+
*/
631+
625632
/* First check for .done --- this means archiver is done with it */
626633
StatusFilePath(archiveStatusPath, xlog, ".done");
627634
if (stat(archiveStatusPath, &stat_buf) == 0)

src/include/access/xlog.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,14 @@ typedef enum WalLevel
129129
WAL_LEVEL_LOGICAL
130130
} WalLevel;
131131

132+
/* Recovery states */
133+
typedef enum RecoveryState
134+
{
135+
RECOVERY_STATE_CRASH = 0, /* crash recovery */
136+
RECOVERY_STATE_ARCHIVE, /* archive recovery */
137+
RECOVERY_STATE_DONE /* currently in production */
138+
} RecoveryState;
139+
132140
extern PGDLLIMPORT int wal_level;
133141

134142
/* Is WAL archiving enabled (always or only while server is running normally)? */
@@ -243,6 +251,7 @@ extern const char *xlog_identify(uint8 info);
243251
extern void issue_xlog_fsync(int fd, XLogSegNo segno);
244252

245253
extern bool RecoveryInProgress(void);
254+
extern RecoveryState GetRecoveryState(void);
246255
extern bool HotStandbyActive(void);
247256
extern bool HotStandbyActiveInReplay(void);
248257
extern bool XLogInsertAllowed(void);

0 commit comments

Comments
 (0)