|
49 | 49 | RECOVERY_CONF_FILE, \
|
50 | 50 | PG_LOG_FILE, \
|
51 | 51 | UTILS_LOG_FILE, \
|
52 |
| - PG_PID_FILE |
| 52 | + PG_CTL__STATUS__OK, \ |
| 53 | + PG_CTL__STATUS__NODE_IS_STOPPED, \ |
| 54 | + PG_CTL__STATUS__BAD_DATADIR \ |
53 | 55 |
|
54 | 56 | from .consts import \
|
55 | 57 | MAX_LOGICAL_REPLICATION_WORKERS, \
|
@@ -208,14 +210,136 @@ def pid(self):
|
208 | 210 | Return postmaster's PID if node is running, else 0.
|
209 | 211 | """
|
210 | 212 |
|
211 |
| - if self.status(): |
212 |
| - pid_file = os.path.join(self.data_dir, PG_PID_FILE) |
213 |
| - lines = self.os_ops.readlines(pid_file) |
214 |
| - pid = int(lines[0]) if lines else None |
215 |
| - return pid |
| 213 | + self__data_dir = self.data_dir |
216 | 214 |
|
217 |
| - # for clarity |
218 |
| - return 0 |
| 215 | + _params = [ |
| 216 | + self._get_bin_path('pg_ctl'), |
| 217 | + "-D", self__data_dir, |
| 218 | + "status" |
| 219 | + ] # yapf: disable |
| 220 | + |
| 221 | + status_code, out, error = execute_utility2( |
| 222 | + self.os_ops, |
| 223 | + _params, |
| 224 | + self.utils_log_file, |
| 225 | + verbose=True, |
| 226 | + ignore_errors=True) |
| 227 | + |
| 228 | + assert type(status_code) == int # noqa: E721 |
| 229 | + assert type(out) == str # noqa: E721 |
| 230 | + assert type(error) == str # noqa: E721 |
| 231 | + |
| 232 | + # ----------------- |
| 233 | + if status_code == PG_CTL__STATUS__NODE_IS_STOPPED: |
| 234 | + return 0 |
| 235 | + |
| 236 | + # ----------------- |
| 237 | + if status_code == PG_CTL__STATUS__BAD_DATADIR: |
| 238 | + return 0 |
| 239 | + |
| 240 | + # ----------------- |
| 241 | + if status_code != PG_CTL__STATUS__OK: |
| 242 | + errMsg = "Getting of a node status [data_dir is {0}] failed.".format(self__data_dir) |
| 243 | + |
| 244 | + raise ExecUtilException( |
| 245 | + message=errMsg, |
| 246 | + command=_params, |
| 247 | + exit_code=status_code, |
| 248 | + out=out, |
| 249 | + error=error, |
| 250 | + ) |
| 251 | + |
| 252 | + # ----------------- |
| 253 | + assert status_code == PG_CTL__STATUS__OK |
| 254 | + |
| 255 | + if out == "": |
| 256 | + __class__._throw_error__pg_ctl_returns_an_empty_string( |
| 257 | + _params |
| 258 | + ) |
| 259 | + |
| 260 | + C_PID_PREFIX = "(PID: " |
| 261 | + |
| 262 | + i = out.find(C_PID_PREFIX) |
| 263 | + |
| 264 | + if i == -1: |
| 265 | + __class__._throw_error__pg_ctl_returns_an_unexpected_string( |
| 266 | + out, |
| 267 | + _params |
| 268 | + ) |
| 269 | + |
| 270 | + assert i > 0 |
| 271 | + assert i < len(out) |
| 272 | + assert len(C_PID_PREFIX) <= len(out) |
| 273 | + assert i <= len(out) - len(C_PID_PREFIX) |
| 274 | + |
| 275 | + i += len(C_PID_PREFIX) |
| 276 | + start_pid_s = i |
| 277 | + |
| 278 | + while True: |
| 279 | + if i == len(out): |
| 280 | + __class__._throw_error__pg_ctl_returns_an_unexpected_string( |
| 281 | + out, |
| 282 | + _params |
| 283 | + ) |
| 284 | + |
| 285 | + ch = out[i] |
| 286 | + |
| 287 | + if ch == ")": |
| 288 | + break |
| 289 | + |
| 290 | + if ch.isdigit(): |
| 291 | + i += 1 |
| 292 | + continue |
| 293 | + |
| 294 | + __class__._throw_error__pg_ctl_returns_an_unexpected_string( |
| 295 | + out, |
| 296 | + _params |
| 297 | + ) |
| 298 | + assert False |
| 299 | + |
| 300 | + if i == start_pid_s: |
| 301 | + __class__._throw_error__pg_ctl_returns_an_unexpected_string( |
| 302 | + out, |
| 303 | + _params |
| 304 | + ) |
| 305 | + |
| 306 | + # TODO: Let's verify a length of pid string. |
| 307 | + |
| 308 | + pid = int(out[start_pid_s:i]) |
| 309 | + |
| 310 | + if pid == 0: |
| 311 | + __class__._throw_error__pg_ctl_returns_a_zero_pid( |
| 312 | + out, |
| 313 | + _params |
| 314 | + ) |
| 315 | + |
| 316 | + assert pid != 0 |
| 317 | + return pid |
| 318 | + |
| 319 | + @staticmethod |
| 320 | + def _throw_error__pg_ctl_returns_an_empty_string(_params): |
| 321 | + errLines = [] |
| 322 | + errLines.append("Utility pg_ctl returns empty string.") |
| 323 | + errLines.append("Command line is {0}".format(_params)) |
| 324 | + raise RuntimeError("\n".join(errLines)) |
| 325 | + |
| 326 | + @staticmethod |
| 327 | + def _throw_error__pg_ctl_returns_an_unexpected_string(out, _params): |
| 328 | + errLines = [] |
| 329 | + errLines.append("Utility pg_ctl returns an unexpected string:") |
| 330 | + errLines.append(out) |
| 331 | + errLines.append("------------") |
| 332 | + errLines.append("Command line is {0}".format(_params)) |
| 333 | + raise RuntimeError("\n".join(errLines)) |
| 334 | + |
| 335 | + @staticmethod |
| 336 | + def _throw_error__pg_ctl_returns_a_zero_pid(out, _params): |
| 337 | + errLines = [] |
| 338 | + errLines.append("Utility pg_ctl returns a zero pid. Output string is:") |
| 339 | + errLines.append(out) |
| 340 | + errLines.append("------------") |
| 341 | + errLines.append("Command line is {0}".format(_params)) |
| 342 | + raise RuntimeError("\n".join(errLines)) |
219 | 343 |
|
220 | 344 | @property
|
221 | 345 | def auxiliary_pids(self):
|
@@ -338,41 +462,84 @@ def version(self):
|
338 | 462 | return self._pg_version
|
339 | 463 |
|
340 | 464 | def _try_shutdown(self, max_attempts, with_force=False):
|
| 465 | + assert type(max_attempts) == int # noqa: E721 |
| 466 | + assert type(with_force) == bool # noqa: E721 |
| 467 | + assert max_attempts > 0 |
| 468 | + |
341 | 469 | attempts = 0
|
| 470 | + |
| 471 | + # try stopping server N times |
| 472 | + while attempts < max_attempts: |
| 473 | + attempts += 1 |
| 474 | + try: |
| 475 | + self.stop() |
| 476 | + except ExecUtilException: |
| 477 | + continue # one more time |
| 478 | + except Exception: |
| 479 | + eprint('cannot stop node {}'.format(self.name)) |
| 480 | + break |
| 481 | + |
| 482 | + return # OK |
| 483 | + |
| 484 | + # If force stopping is enabled and PID is valid |
| 485 | + if not with_force: |
| 486 | + return |
| 487 | + |
342 | 488 | node_pid = self.pid
|
| 489 | + assert node_pid is not None |
| 490 | + assert type(node_pid) == int # noqa: E721 |
343 | 491 |
|
344 |
| - if node_pid > 0: |
345 |
| - # try stopping server N times |
346 |
| - while attempts < max_attempts: |
347 |
| - try: |
348 |
| - self.stop() |
349 |
| - break # OK |
350 |
| - except ExecUtilException: |
351 |
| - pass # one more time |
352 |
| - except Exception: |
353 |
| - eprint('cannot stop node {}'.format(self.name)) |
354 |
| - break |
355 |
| - |
356 |
| - attempts += 1 |
357 |
| - |
358 |
| - # If force stopping is enabled and PID is valid |
359 |
| - if with_force and node_pid != 0: |
360 |
| - # If we couldn't stop the node |
361 |
| - p_status_output = self.os_ops.exec_command(cmd=f'ps -o pid= -p {node_pid}', shell=True, ignore_errors=True).decode('utf-8') |
362 |
| - if self.status() != NodeStatus.Stopped and p_status_output and str(node_pid) in p_status_output: |
363 |
| - try: |
364 |
| - eprint(f'Force stopping node {self.name} with PID {node_pid}') |
365 |
| - self.os_ops.kill(node_pid, signal.SIGKILL, expect_error=False) |
366 |
| - except Exception: |
367 |
| - # The node has already stopped |
368 |
| - pass |
369 |
| - |
370 |
| - # Check that node stopped - print only column pid without headers |
371 |
| - p_status_output = self.os_ops.exec_command(f'ps -o pid= -p {node_pid}', shell=True, ignore_errors=True).decode('utf-8') |
372 |
| - if p_status_output and str(node_pid) in p_status_output: |
373 |
| - eprint(f'Failed to stop node {self.name}.') |
374 |
| - else: |
375 |
| - eprint(f'Node {self.name} has been stopped successfully.') |
| 492 | + if node_pid == 0: |
| 493 | + return |
| 494 | + |
| 495 | + # TODO: [2025-02-28] It is really the old ugly code. We have to rewrite it! |
| 496 | + |
| 497 | + ps_command = ['ps', '-o', 'pid=', '-p', str(node_pid)] |
| 498 | + |
| 499 | + ps_output = self.os_ops.exec_command(cmd=ps_command, shell=True, ignore_errors=True).decode('utf-8') |
| 500 | + assert type(ps_output) == str # noqa: E721 |
| 501 | + |
| 502 | + if ps_output == "": |
| 503 | + return |
| 504 | + |
| 505 | + if ps_output != str(node_pid): |
| 506 | + __class__._throw_bugcheck__unexpected_result_of_ps( |
| 507 | + ps_output, |
| 508 | + ps_command) |
| 509 | + |
| 510 | + try: |
| 511 | + eprint('Force stopping node {0} with PID {1}'.format(self.name, node_pid)) |
| 512 | + self.os_ops.kill(node_pid, signal.SIGKILL, expect_error=False) |
| 513 | + except Exception: |
| 514 | + # The node has already stopped |
| 515 | + pass |
| 516 | + |
| 517 | + # Check that node stopped - print only column pid without headers |
| 518 | + ps_output = self.os_ops.exec_command(cmd=ps_command, shell=True, ignore_errors=True).decode('utf-8') |
| 519 | + assert type(ps_output) == str # noqa: E721 |
| 520 | + |
| 521 | + if ps_output == "": |
| 522 | + eprint('Node {0} has been stopped successfully.'.format(self.name)) |
| 523 | + return |
| 524 | + |
| 525 | + if ps_output == str(node_pid): |
| 526 | + eprint('Failed to stop node {0}.'.format(self.name)) |
| 527 | + return |
| 528 | + |
| 529 | + __class__._throw_bugcheck__unexpected_result_of_ps( |
| 530 | + ps_output, |
| 531 | + ps_command) |
| 532 | + |
| 533 | + @staticmethod |
| 534 | + def _throw_bugcheck__unexpected_result_of_ps(result, cmd): |
| 535 | + assert type(result) == str # noqa: E721 |
| 536 | + assert type(cmd) == list # noqa: E721 |
| 537 | + errLines = [] |
| 538 | + errLines.append("[BUG CHECK] Unexpected result of command ps:") |
| 539 | + errLines.append(result) |
| 540 | + errLines.append("-----") |
| 541 | + errLines.append("Command line is {0}".format(cmd)) |
| 542 | + raise RuntimeError("\n".join(errLines)) |
376 | 543 |
|
377 | 544 | def _assign_master(self, master):
|
378 | 545 | """NOTE: this is a private method!"""
|
|
0 commit comments