From 914d6e3868fad1728d1881bbcb1cf645ef0aa3db Mon Sep 17 00:00:00 2001 From: vshepard Date: Mon, 29 Jul 2024 15:42:08 +0200 Subject: [PATCH] Add force node stopping using SIGKILL in case of unsuccessful pg_ctl stop --- testgres/node.py | 23 +++++++++++++++++++++-- testgres/operations/local_ops.py | 4 ++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/testgres/node.py b/testgres/node.py index 2ea49529..8b30476f 100644 --- a/testgres/node.py +++ b/testgres/node.py @@ -330,8 +330,9 @@ def version(self): """ return self._pg_version - def _try_shutdown(self, max_attempts): + def _try_shutdown(self, max_attempts, with_force=False): attempts = 0 + node_pid = self.pid # try stopping server N times while attempts < max_attempts: @@ -341,12 +342,30 @@ def _try_shutdown(self, max_attempts): except ExecUtilException: pass # one more time except Exception: - # TODO: probably should kill stray instance eprint('cannot stop node {}'.format(self.name)) break attempts += 1 + # If force stopping is enabled and PID is valid + if with_force and node_pid != 0: + # If we couldn't stop the node + p_status_output = self.os_ops.exec_command(cmd=f'ps -p {node_pid}', shell=True).decode('utf-8') + if self.status() != NodeStatus.Stopped and p_status_output and str(node_pid) in p_status_output: + try: + eprint(f'Force stopping node {self.name} with PID {node_pid}') + self.os_ops.kill(node_pid, signal.SIGKILL, expect_error=False) + except Exception: + # The node has already stopped + pass + + # Check that node stopped + p_status_output = self.os_ops.exec_command(f'ps -p {node_pid}', shell=True, expect_error=True).decode('utf-8') + if p_status_output and str(node_pid) in p_status_output: + eprint(f'Failed to stop node {self.name}.') + else: + eprint(f'Node {self.name} has been stopped successfully.') + def _assign_master(self, master): """NOTE: this is a private method!""" diff --git a/testgres/operations/local_ops.py b/testgres/operations/local_ops.py index b518a6cb..3d9e490e 100644 --- a/testgres/operations/local_ops.py +++ b/testgres/operations/local_ops.py @@ -293,10 +293,10 @@ def remove_file(self, filename): return os.remove(filename) # Processes control - def kill(self, pid, signal): + def kill(self, pid, signal, expect_error=False): # Kill the process cmd = "kill -{} {}".format(signal, pid) - return self.exec_command(cmd) + return self.exec_command(cmd, expect_error=expect_error) def get_pid(self): # Get current process id