Skip to content

Commit 8900b5a

Browse files
committed
Detect the deadlocks between backends and the startup process.
The deadlocks that the recovery conflict on lock is involved in can happen between hot-standby backends and the startup process. If a backend takes an access exclusive lock on the table and which finally triggers the deadlock, that deadlock can be detected as expected. On the other hand, previously, if the startup process took an access exclusive lock and which finally triggered the deadlock, that deadlock could not be detected and could remain even after deadlock_timeout passed. This is a bug. The cause of this bug was that the code for handling the recovery conflict on lock didn't take care of deadlock case at all. It assumed that deadlocks involving the startup process and backends were able to be detected by the deadlock detector invoked within backends. But this assumption was incorrect. The startup process also should have invoked the deadlock detector if necessary. To fix this bug, this commit makes the startup process invoke the deadlock detector if deadlock_timeout is reached while handling the recovery conflict on lock. Specifically, in that case, the startup process requests all the backends holding the conflicting locks to check themselves for deadlocks. Back-patch to v9.6. v9.5 has also this bug, but per discussion we decided not to back-patch the fix to v9.5. Because v9.5 doesn't have some infrastructure codes (e.g., 37c5486) that this bug fix patch depends on. We can apply those codes for the back-patch, but since the next minor version release is the final one for v9.5, it's risky to do that. If we unexpectedly introduce new bug to v9.5 by the back-patch, there is no chance to fix that. We determined that the back-patch to v9.5 would give more risk than gain. Author: Fujii Masao Reviewed-by: Bertrand Drouvot, Masahiko Sawada, Kyotaro Horiguchi Discussion: https://postgr.es/m/[email protected]
1 parent e02e840 commit 8900b5a

File tree

5 files changed

+141
-32
lines changed

5 files changed

+141
-32
lines changed

src/backend/storage/ipc/procarray.c

+8-1
Original file line numberDiff line numberDiff line change
@@ -3324,6 +3324,13 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
33243324
*/
33253325
pid_t
33263326
CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
3327+
{
3328+
return SignalVirtualTransaction(vxid, sigmode, true);
3329+
}
3330+
3331+
pid_t
3332+
SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode,
3333+
bool conflictPending)
33273334
{
33283335
ProcArrayStruct *arrayP = procArray;
33293336
int index;
@@ -3342,7 +3349,7 @@ CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
33423349
if (procvxid.backendId == vxid.backendId &&
33433350
procvxid.localTransactionId == vxid.localTransactionId)
33443351
{
3345-
proc->recoveryConflictPending = true;
3352+
proc->recoveryConflictPending = conflictPending;
33463353
pid = proc->pid;
33473354
if (pid != 0)
33483355
{

src/backend/storage/ipc/standby.c

+114-29
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ int max_standby_streaming_delay = 30 * 1000;
4242

4343
static HTAB *RecoveryLockLists;
4444

45+
/* Flags set by timeout handlers */
46+
static volatile sig_atomic_t got_standby_deadlock_timeout = false;
47+
static volatile sig_atomic_t got_standby_lock_timeout = false;
48+
4549
static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
4650
ProcSignalReason reason,
4751
uint32 wait_event_info,
@@ -397,8 +401,10 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
397401
* lock. As we are already queued to be granted the lock, no new lock
398402
* requests conflicting with ours will be granted in the meantime.
399403
*
400-
* Deadlocks involving the Startup process and an ordinary backend process
401-
* will be detected by the deadlock detector within the ordinary backend.
404+
* We also must check for deadlocks involving the Startup process and
405+
* hot-standby backend processes. If deadlock_timeout is reached in
406+
* this function, all the backends holding the conflicting locks are
407+
* requested to check themselves for deadlocks.
402408
*/
403409
void
404410
ResolveRecoveryConflictWithLock(LOCKTAG locktag)
@@ -409,7 +415,7 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
409415

410416
ltime = GetStandbyLimitTime();
411417

412-
if (GetCurrentTimestamp() >= ltime)
418+
if (GetCurrentTimestamp() >= ltime && ltime != 0)
413419
{
414420
/*
415421
* We're already behind, so clear a path as quickly as possible.
@@ -431,26 +437,85 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
431437
else
432438
{
433439
/*
434-
* Wait (or wait again) until ltime
440+
* Wait (or wait again) until ltime, and check for deadlocks as well
441+
* if we will be waiting longer than deadlock_timeout
435442
*/
436-
EnableTimeoutParams timeouts[1];
443+
EnableTimeoutParams timeouts[2];
444+
int cnt = 0;
445+
446+
if (ltime != 0)
447+
{
448+
got_standby_lock_timeout = false;
449+
timeouts[cnt].id = STANDBY_LOCK_TIMEOUT;
450+
timeouts[cnt].type = TMPARAM_AT;
451+
timeouts[cnt].fin_time = ltime;
452+
cnt++;
453+
}
437454

438-
timeouts[0].id = STANDBY_LOCK_TIMEOUT;
439-
timeouts[0].type = TMPARAM_AT;
440-
timeouts[0].fin_time = ltime;
441-
enable_timeouts(timeouts, 1);
455+
got_standby_deadlock_timeout = false;
456+
timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
457+
timeouts[cnt].type = TMPARAM_AFTER;
458+
timeouts[cnt].delay_ms = DeadlockTimeout;
459+
cnt++;
460+
461+
enable_timeouts(timeouts, cnt);
442462
}
443463

444464
/* Wait to be signaled by the release of the Relation Lock */
445465
ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
446466

467+
/*
468+
* Exit if ltime is reached. Then all the backends holding conflicting
469+
* locks will be canceled in the next ResolveRecoveryConflictWithLock()
470+
* call.
471+
*/
472+
if (got_standby_lock_timeout)
473+
goto cleanup;
474+
475+
if (got_standby_deadlock_timeout)
476+
{
477+
VirtualTransactionId *backends;
478+
479+
backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
480+
481+
/* Quick exit if there's no work to be done */
482+
if (!VirtualTransactionIdIsValid(*backends))
483+
goto cleanup;
484+
485+
/*
486+
* Send signals to all the backends holding the conflicting locks, to
487+
* ask them to check themselves for deadlocks.
488+
*/
489+
while (VirtualTransactionIdIsValid(*backends))
490+
{
491+
SignalVirtualTransaction(*backends,
492+
PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
493+
false);
494+
backends++;
495+
}
496+
497+
/*
498+
* Wait again here to be signaled by the release of the Relation Lock,
499+
* to prevent the subsequent RecoveryConflictWithLock() from causing
500+
* deadlock_timeout and sending a request for deadlocks check again.
501+
* Otherwise the request continues to be sent every deadlock_timeout
502+
* until the relation locks are released or ltime is reached.
503+
*/
504+
got_standby_deadlock_timeout = false;
505+
ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
506+
}
507+
508+
cleanup:
509+
447510
/*
448511
* Clear any timeout requests established above. We assume here that the
449512
* Startup process doesn't have any other outstanding timeouts than those
450513
* used by this function. If that stops being true, we could cancel the
451514
* timeouts individually, but that'd be slower.
452515
*/
453516
disable_all_timeouts(false);
517+
got_standby_lock_timeout = false;
518+
got_standby_deadlock_timeout = false;
454519
}
455520

456521
/*
@@ -489,15 +554,7 @@ ResolveRecoveryConflictWithBufferPin(void)
489554

490555
ltime = GetStandbyLimitTime();
491556

492-
if (ltime == 0)
493-
{
494-
/*
495-
* We're willing to wait forever for conflicts, so set timeout for
496-
* deadlock check only
497-
*/
498-
enable_timeout_after(STANDBY_DEADLOCK_TIMEOUT, DeadlockTimeout);
499-
}
500-
else if (GetCurrentTimestamp() >= ltime)
557+
if (GetCurrentTimestamp() >= ltime && ltime != 0)
501558
{
502559
/*
503560
* We're already behind, so clear a path as quickly as possible.
@@ -511,14 +568,23 @@ ResolveRecoveryConflictWithBufferPin(void)
511568
* waiting longer than deadlock_timeout
512569
*/
513570
EnableTimeoutParams timeouts[2];
571+
int cnt = 0;
514572

515-
timeouts[0].id = STANDBY_TIMEOUT;
516-
timeouts[0].type = TMPARAM_AT;
517-
timeouts[0].fin_time = ltime;
518-
timeouts[1].id = STANDBY_DEADLOCK_TIMEOUT;
519-
timeouts[1].type = TMPARAM_AFTER;
520-
timeouts[1].delay_ms = DeadlockTimeout;
521-
enable_timeouts(timeouts, 2);
573+
if (ltime != 0)
574+
{
575+
timeouts[cnt].id = STANDBY_TIMEOUT;
576+
timeouts[cnt].type = TMPARAM_AT;
577+
timeouts[cnt].fin_time = ltime;
578+
cnt++;
579+
}
580+
581+
got_standby_deadlock_timeout = false;
582+
timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
583+
timeouts[cnt].type = TMPARAM_AFTER;
584+
timeouts[cnt].delay_ms = DeadlockTimeout;
585+
cnt++;
586+
587+
enable_timeouts(timeouts, cnt);
522588
}
523589

524590
/*
@@ -531,13 +597,33 @@ ResolveRecoveryConflictWithBufferPin(void)
531597
*/
532598
ProcWaitForSignal(PG_WAIT_BUFFER_PIN);
533599

600+
if (got_standby_deadlock_timeout)
601+
{
602+
/*
603+
* Send out a request for hot-standby backends to check themselves for
604+
* deadlocks.
605+
*
606+
* XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
607+
* to be signaled by UnpinBuffer() again and send a request for
608+
* deadlocks check if deadlock_timeout happens. This causes the
609+
* request to continue to be sent every deadlock_timeout until the
610+
* buffer is unpinned or ltime is reached. This would increase the
611+
* workload in the startup process and backends. In practice it may
612+
* not be so harmful because the period that the buffer is kept pinned
613+
* is basically no so long. But we should fix this?
614+
*/
615+
SendRecoveryConflictWithBufferPin(
616+
PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
617+
}
618+
534619
/*
535620
* Clear any timeout requests established above. We assume here that the
536621
* Startup process doesn't have any other timeouts than what this function
537622
* uses. If that stops being true, we could cancel the timeouts
538623
* individually, but that'd be slower.
539624
*/
540625
disable_all_timeouts(false);
626+
got_standby_deadlock_timeout = false;
541627
}
542628

543629
static void
@@ -597,13 +683,12 @@ CheckRecoveryConflictDeadlock(void)
597683

598684
/*
599685
* StandbyDeadLockHandler() will be called if STANDBY_DEADLOCK_TIMEOUT
600-
* occurs before STANDBY_TIMEOUT. Send out a request for hot-standby
601-
* backends to check themselves for deadlocks.
686+
* occurs before STANDBY_TIMEOUT.
602687
*/
603688
void
604689
StandbyDeadLockHandler(void)
605690
{
606-
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
691+
got_standby_deadlock_timeout = true;
607692
}
608693

609694
/*
@@ -622,11 +707,11 @@ StandbyTimeoutHandler(void)
622707

623708
/*
624709
* StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
625-
* This doesn't need to do anything, simply waking up is enough.
626710
*/
627711
void
628712
StandbyLockTimeoutHandler(void)
629713
{
714+
got_standby_lock_timeout = true;
630715
}
631716

632717
/*

src/backend/storage/lmgr/proc.c

+3
Original file line numberDiff line numberDiff line change
@@ -1793,6 +1793,9 @@ CheckDeadLockAlert(void)
17931793
* Have to set the latch again, even if handle_sig_alarm already did. Back
17941794
* then got_deadlock_timeout wasn't yet set... It's unlikely that this
17951795
* ever would be a problem, but setting a set latch again is cheap.
1796+
*
1797+
* Note that, when this function runs inside procsignal_sigusr1_handler(),
1798+
* the handler function sets the latch again after the latch is set here.
17961799
*/
17971800
SetLatch(MyLatch);
17981801
errno = save_errno;

src/backend/tcop/postgres.c

+14-2
Original file line numberDiff line numberDiff line change
@@ -2950,11 +2950,23 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
29502950
case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
29512951

29522952
/*
2953-
* If we aren't blocking the Startup process there is nothing
2954-
* more to do.
2953+
* If PROCSIG_RECOVERY_CONFLICT_BUFFERPIN is requested but we
2954+
* aren't blocking the Startup process there is nothing more
2955+
* to do.
2956+
*
2957+
* When PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK is
2958+
* requested, if we're waiting for locks and the startup
2959+
* process is not waiting for buffer pin (i.e., also waiting
2960+
* for locks), we set the flag so that ProcSleep() will check
2961+
* for deadlocks.
29552962
*/
29562963
if (!HoldingBufferPinThatDelaysRecovery())
2964+
{
2965+
if (reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK &&
2966+
GetStartupBufferPinWaitBufId() < 0)
2967+
CheckDeadLockAlert();
29572968
return;
2969+
}
29582970

29592971
MyProc->recoveryConflictPending = true;
29602972

src/include/storage/procarray.h

+2
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ extern VirtualTransactionId *GetCurrentVirtualXIDs(TransactionId limitXmin,
7272
int *nvxids);
7373
extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid);
7474
extern pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode);
75+
extern pid_t SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode,
76+
bool conflictPending);
7577

7678
extern bool MinimumActiveBackends(int min);
7779
extern int CountDBBackends(Oid databaseid);

0 commit comments

Comments
 (0)