Skip to content

Commit 22826e0

Browse files
PostgresNode::pid is improved (#199)
* PostgresNode::pid is improved - We do multiple attempts to read pid file. - We process a case when we see that node is stopped between test and read. - We process a case when pid-file is empty. * PostgresNode::pid is updated Assert is added. * execute_utility2 is updated (ignore_errors) - New parameters "ignore_errors" is added. Default value is False. - Asserts are added. * PostgresNode::_try_shutdown is rewrited (normalization) * PostgresNode::pid uses the data from "pg_ctl status" output. * PostgresNode::_try_shutdown is correct (return None) This method returns nothing (None).
1 parent de432ed commit 22826e0

File tree

2 files changed

+211
-40
lines changed

2 files changed

+211
-40
lines changed

testgres/consts.py

+4
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,7 @@
3535

3636
# logical replication settings
3737
LOGICAL_REPL_MAX_CATCHUP_ATTEMPTS = 60
38+
39+
PG_CTL__STATUS__OK = 0
40+
PG_CTL__STATUS__NODE_IS_STOPPED = 3
41+
PG_CTL__STATUS__BAD_DATADIR = 4

testgres/node.py

+207-40
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@
4949
RECOVERY_CONF_FILE, \
5050
PG_LOG_FILE, \
5151
UTILS_LOG_FILE, \
52-
PG_PID_FILE
52+
PG_CTL__STATUS__OK, \
53+
PG_CTL__STATUS__NODE_IS_STOPPED, \
54+
PG_CTL__STATUS__BAD_DATADIR \
5355

5456
from .consts import \
5557
MAX_LOGICAL_REPLICATION_WORKERS, \
@@ -208,14 +210,136 @@ def pid(self):
208210
Return postmaster's PID if node is running, else 0.
209211
"""
210212

211-
if self.status():
212-
pid_file = os.path.join(self.data_dir, PG_PID_FILE)
213-
lines = self.os_ops.readlines(pid_file)
214-
pid = int(lines[0]) if lines else None
215-
return pid
213+
self__data_dir = self.data_dir
216214

217-
# for clarity
218-
return 0
215+
_params = [
216+
self._get_bin_path('pg_ctl'),
217+
"-D", self__data_dir,
218+
"status"
219+
] # yapf: disable
220+
221+
status_code, out, error = execute_utility2(
222+
self.os_ops,
223+
_params,
224+
self.utils_log_file,
225+
verbose=True,
226+
ignore_errors=True)
227+
228+
assert type(status_code) == int # noqa: E721
229+
assert type(out) == str # noqa: E721
230+
assert type(error) == str # noqa: E721
231+
232+
# -----------------
233+
if status_code == PG_CTL__STATUS__NODE_IS_STOPPED:
234+
return 0
235+
236+
# -----------------
237+
if status_code == PG_CTL__STATUS__BAD_DATADIR:
238+
return 0
239+
240+
# -----------------
241+
if status_code != PG_CTL__STATUS__OK:
242+
errMsg = "Getting of a node status [data_dir is {0}] failed.".format(self__data_dir)
243+
244+
raise ExecUtilException(
245+
message=errMsg,
246+
command=_params,
247+
exit_code=status_code,
248+
out=out,
249+
error=error,
250+
)
251+
252+
# -----------------
253+
assert status_code == PG_CTL__STATUS__OK
254+
255+
if out == "":
256+
__class__._throw_error__pg_ctl_returns_an_empty_string(
257+
_params
258+
)
259+
260+
C_PID_PREFIX = "(PID: "
261+
262+
i = out.find(C_PID_PREFIX)
263+
264+
if i == -1:
265+
__class__._throw_error__pg_ctl_returns_an_unexpected_string(
266+
out,
267+
_params
268+
)
269+
270+
assert i > 0
271+
assert i < len(out)
272+
assert len(C_PID_PREFIX) <= len(out)
273+
assert i <= len(out) - len(C_PID_PREFIX)
274+
275+
i += len(C_PID_PREFIX)
276+
start_pid_s = i
277+
278+
while True:
279+
if i == len(out):
280+
__class__._throw_error__pg_ctl_returns_an_unexpected_string(
281+
out,
282+
_params
283+
)
284+
285+
ch = out[i]
286+
287+
if ch == ")":
288+
break
289+
290+
if ch.isdigit():
291+
i += 1
292+
continue
293+
294+
__class__._throw_error__pg_ctl_returns_an_unexpected_string(
295+
out,
296+
_params
297+
)
298+
assert False
299+
300+
if i == start_pid_s:
301+
__class__._throw_error__pg_ctl_returns_an_unexpected_string(
302+
out,
303+
_params
304+
)
305+
306+
# TODO: Let's verify a length of pid string.
307+
308+
pid = int(out[start_pid_s:i])
309+
310+
if pid == 0:
311+
__class__._throw_error__pg_ctl_returns_a_zero_pid(
312+
out,
313+
_params
314+
)
315+
316+
assert pid != 0
317+
return pid
318+
319+
@staticmethod
320+
def _throw_error__pg_ctl_returns_an_empty_string(_params):
321+
errLines = []
322+
errLines.append("Utility pg_ctl returns empty string.")
323+
errLines.append("Command line is {0}".format(_params))
324+
raise RuntimeError("\n".join(errLines))
325+
326+
@staticmethod
327+
def _throw_error__pg_ctl_returns_an_unexpected_string(out, _params):
328+
errLines = []
329+
errLines.append("Utility pg_ctl returns an unexpected string:")
330+
errLines.append(out)
331+
errLines.append("------------")
332+
errLines.append("Command line is {0}".format(_params))
333+
raise RuntimeError("\n".join(errLines))
334+
335+
@staticmethod
336+
def _throw_error__pg_ctl_returns_a_zero_pid(out, _params):
337+
errLines = []
338+
errLines.append("Utility pg_ctl returns a zero pid. Output string is:")
339+
errLines.append(out)
340+
errLines.append("------------")
341+
errLines.append("Command line is {0}".format(_params))
342+
raise RuntimeError("\n".join(errLines))
219343

220344
@property
221345
def auxiliary_pids(self):
@@ -338,41 +462,84 @@ def version(self):
338462
return self._pg_version
339463

340464
def _try_shutdown(self, max_attempts, with_force=False):
465+
assert type(max_attempts) == int # noqa: E721
466+
assert type(with_force) == bool # noqa: E721
467+
assert max_attempts > 0
468+
341469
attempts = 0
470+
471+
# try stopping server N times
472+
while attempts < max_attempts:
473+
attempts += 1
474+
try:
475+
self.stop()
476+
except ExecUtilException:
477+
continue # one more time
478+
except Exception:
479+
eprint('cannot stop node {}'.format(self.name))
480+
break
481+
482+
return # OK
483+
484+
# If force stopping is enabled and PID is valid
485+
if not with_force:
486+
return
487+
342488
node_pid = self.pid
489+
assert node_pid is not None
490+
assert type(node_pid) == int # noqa: E721
343491

344-
if node_pid > 0:
345-
# try stopping server N times
346-
while attempts < max_attempts:
347-
try:
348-
self.stop()
349-
break # OK
350-
except ExecUtilException:
351-
pass # one more time
352-
except Exception:
353-
eprint('cannot stop node {}'.format(self.name))
354-
break
355-
356-
attempts += 1
357-
358-
# If force stopping is enabled and PID is valid
359-
if with_force and node_pid != 0:
360-
# If we couldn't stop the node
361-
p_status_output = self.os_ops.exec_command(cmd=f'ps -o pid= -p {node_pid}', shell=True, ignore_errors=True).decode('utf-8')
362-
if self.status() != NodeStatus.Stopped and p_status_output and str(node_pid) in p_status_output:
363-
try:
364-
eprint(f'Force stopping node {self.name} with PID {node_pid}')
365-
self.os_ops.kill(node_pid, signal.SIGKILL, expect_error=False)
366-
except Exception:
367-
# The node has already stopped
368-
pass
369-
370-
# Check that node stopped - print only column pid without headers
371-
p_status_output = self.os_ops.exec_command(f'ps -o pid= -p {node_pid}', shell=True, ignore_errors=True).decode('utf-8')
372-
if p_status_output and str(node_pid) in p_status_output:
373-
eprint(f'Failed to stop node {self.name}.')
374-
else:
375-
eprint(f'Node {self.name} has been stopped successfully.')
492+
if node_pid == 0:
493+
return
494+
495+
# TODO: [2025-02-28] It is really the old ugly code. We have to rewrite it!
496+
497+
ps_command = ['ps', '-o', 'pid=', '-p', str(node_pid)]
498+
499+
ps_output = self.os_ops.exec_command(cmd=ps_command, shell=True, ignore_errors=True).decode('utf-8')
500+
assert type(ps_output) == str # noqa: E721
501+
502+
if ps_output == "":
503+
return
504+
505+
if ps_output != str(node_pid):
506+
__class__._throw_bugcheck__unexpected_result_of_ps(
507+
ps_output,
508+
ps_command)
509+
510+
try:
511+
eprint('Force stopping node {0} with PID {1}'.format(self.name, node_pid))
512+
self.os_ops.kill(node_pid, signal.SIGKILL, expect_error=False)
513+
except Exception:
514+
# The node has already stopped
515+
pass
516+
517+
# Check that node stopped - print only column pid without headers
518+
ps_output = self.os_ops.exec_command(cmd=ps_command, shell=True, ignore_errors=True).decode('utf-8')
519+
assert type(ps_output) == str # noqa: E721
520+
521+
if ps_output == "":
522+
eprint('Node {0} has been stopped successfully.'.format(self.name))
523+
return
524+
525+
if ps_output == str(node_pid):
526+
eprint('Failed to stop node {0}.'.format(self.name))
527+
return
528+
529+
__class__._throw_bugcheck__unexpected_result_of_ps(
530+
ps_output,
531+
ps_command)
532+
533+
@staticmethod
534+
def _throw_bugcheck__unexpected_result_of_ps(result, cmd):
535+
assert type(result) == str # noqa: E721
536+
assert type(cmd) == list # noqa: E721
537+
errLines = []
538+
errLines.append("[BUG CHECK] Unexpected result of command ps:")
539+
errLines.append(result)
540+
errLines.append("-----")
541+
errLines.append("Command line is {0}".format(cmd))
542+
raise RuntimeError("\n".join(errLines))
376543

377544
def _assign_master(self, master):
378545
"""NOTE: this is a private method!"""

0 commit comments

Comments
 (0)