Skip to content

Commit 35953af

Browse files
author
vshepard
committed
Add force node stopping using SIGKILL in case of unsuccessful pg_ctl stop
1 parent 5c7cf18 commit 35953af

File tree

2 files changed

+30
-5
lines changed

2 files changed

+30
-5
lines changed

testgres/node.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -330,23 +330,43 @@ def version(self):
330330
"""
331331
return self._pg_version
332332

333-
def _try_shutdown(self, max_attempts):
333+
def _try_shutdown(self, max_attempts, with_force=False):
334334
attempts = 0
335+
node_pid = self.pid
335336

336337
# try stopping server N times
337338
while attempts < max_attempts:
338339
try:
339-
self.stop()
340+
self.stop(wait=False)
340341
break # OK
341342
except ExecUtilException:
342343
pass # one more time
343344
except Exception:
344-
# TODO: probably should kill stray instance
345345
eprint('cannot stop node {}'.format(self.name))
346346
break
347347

348348
attempts += 1
349349

350+
# If force stopping is enabled and PID is valid
351+
if with_force and node_pid !=0:
352+
# If we couldn't stop the node
353+
p_status_output = self.os_ops.exec_command(cmd=f'ps -p {node_pid}', shell=True).decode('utf-8')
354+
if self.status() != NodeStatus.Stopped and p_status_output and str(node_pid) in p_status_output:
355+
try:
356+
eprint(f'Force stopping node {self.name} with PID {node_pid}')
357+
self.os_ops.kill(node_pid, signal.SIGKILL, expect_error=False)
358+
except Exception:
359+
# The node has already stopped
360+
pass
361+
362+
# Check that node stopped
363+
p_status_output = self.os_ops.exec_command(f'ps -p {node_pid}', shell=True, expect_error=True).decode('utf-8')
364+
if p_status_output and str(node_pid) in p_status_output:
365+
eprint(f'Failed to stop node {self.name}.')
366+
else:
367+
eprint(f'Node {self.name} has been stopped successfully.')
368+
369+
350370
def _assign_master(self, master):
351371
"""NOTE: this is a private method!"""
352372

@@ -935,6 +955,11 @@ def cleanup(self, max_attempts=3, full=False):
935955
rm_dir = self.data_dir # just data, save logs
936956

937957
self.os_ops.rmdirs(rm_dir, ignore_errors=False)
958+
is_not_removed = self.os_ops.isdir(rm_dir)
959+
while is_not_removed:
960+
time.sleep(1)
961+
self.os_ops.rmdirs(rm_dir, ignore_errors=False)
962+
is_not_removed = self.os_ops.isdir(rm_dir)
938963

939964
return self
940965

testgres/operations/local_ops.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -293,10 +293,10 @@ def remove_file(self, filename):
293293
return os.remove(filename)
294294

295295
# Processes control
296-
def kill(self, pid, signal):
296+
def kill(self, pid, signal, expect_error=False):
297297
# Kill the process
298298
cmd = "kill -{} {}".format(signal, pid)
299-
return self.exec_command(cmd)
299+
return self.exec_command(cmd, expect_error=expect_error)
300300

301301
def get_pid(self):
302302
# Get current process id

0 commit comments

Comments
 (0)