Skip to content

Commit 74cd21a

Browse files
committed
apply lubennikovaav review.patch
1 parent bee476c commit 74cd21a

File tree

2 files changed

+50
-0
lines changed

2 files changed

+50
-0
lines changed

src/catchup.c

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,16 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads)
5252
do_catchup_instance(source_pgdata, dest_pgdata, source_conn, &source_node_info,
5353
no_sync, backup_logs, dest_pgdata_is_empty);
5454

55+
//REVIEW: Are we going to do that before release?
5556
/* TODO: show the amount of transfered data in bytes and calculate incremental ratio */
5657

5758
return 0;
5859
}
5960

61+
//REVIEW Please add a comment to this function.
62+
//Besides, the name of this function looks strange to me.
63+
//Maybe catchup_init_state() or catchup_setup() will do better?
64+
//I'd also suggest to wrap all these fields into some CatchupState, but it isn't urgent.
6065
static PGconn *
6166
catchup_collect_info(PGNodeInfo *source_node_info, const char *source_pgdata, const char *dest_pgdata)
6267
{
@@ -70,12 +75,14 @@ catchup_collect_info(PGNodeInfo *source_node_info, const char *source_pgdata, co
7075
current.start_time = time(NULL);
7176

7277
StrNCpy(current.program_version, PROGRAM_VERSION, sizeof(current.program_version));
78+
//REVIEW I guess these are some copy-paste leftovers. Let's clean them.
7379
//current.compress_alg = instance_config.compress_alg;
7480
//current.compress_level = instance_config.compress_level;
7581

7682
/* Do some compatibility checks and fill basic info about PG instance */
7783
source_conn = pgdata_basic_setup(instance_config.conn_opt, source_node_info);
7884

85+
//REVIEW Please adjust the comment. Do we need this code for catchup at all?
7986
/* below perform checks specific for backup command */
8087
#if PG_VERSION_NUM >= 110000
8188
if (!RetrieveWalSegSize(source_conn))
@@ -106,10 +113,12 @@ catchup_collect_info(PGNodeInfo *source_node_info, const char *source_pgdata, co
106113
return source_conn;
107114
}
108115

116+
//REVIEW Please add a comment to this function.
109117
static void
110118
catchup_preflight_checks(PGNodeInfo *source_node_info, PGconn *source_conn,
111119
const char *source_pgdata, const char *dest_pgdata, bool dest_pgdata_is_empty)
112120
{
121+
//REVIEW Let's fix it before release.
113122
// TODO: add sanity check that source PGDATA is not empty
114123

115124
/* Check that connected PG instance and source PGDATA are the same */
@@ -135,6 +144,7 @@ catchup_preflight_checks(PGNodeInfo *source_node_info, PGconn *source_conn,
135144
if (current.from_replica && exclusive_backup)
136145
elog(ERROR, "Catchup from standby is available only for PG >= 9.6");
137146

147+
//REVIEW FIXME Let's fix it before release. This one seems like a potential bug.
138148
// TODO check if it is local catchup and source contain tablespaces
139149
}
140150

@@ -154,15 +164,18 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
154164
RedoParams dest_redo = { 0, InvalidXLogRecPtr, 0 };
155165
pgFile *source_pg_control_file = NULL;
156166

167+
//REVIEW please adjust this comment.
157168
/* arrays with meta info for multi threaded backup */
158169
pthread_t *threads;
159170
catchup_thread_runner_arg *threads_args;
160171
bool catchup_isok = true;
161172

162173
parray *source_filelist = NULL;
163174
parray *dest_filelist = NULL;
175+
//REVIEW We don't handle external_dirs in catchup, do we? Let's clean this up.
164176
parray *external_dirs = NULL;
165177

178+
//REVIEW FIXME Let's fix it before release. It can cause some obscure bugs.
166179
/* TODO: in case of timeline mistmatch, check that source PG timeline descending from dest PG timeline */
167180
parray *tli_list = NULL;
168181

@@ -172,6 +185,8 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
172185
char pretty_bytes[20];
173186

174187
PGStopBackupResult stop_backup_result;
188+
//REVIEW Is it relevant to catchup? I suppose it isn't, since catchup is a new code.
189+
//If we do need it, please write a comment explaining that.
175190
/* kludge against some old bug in archive_timeout. TODO: remove in 3.0.0 */
176191
int timeout = (instance_config.archive_timeout > 0) ?
177192
instance_config.archive_timeout : ARCHIVE_TIMEOUT_DEFAULT;
@@ -184,10 +199,14 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
184199
strncat(label, " with pg_probackup", lengthof(label) -
185200
strlen(" with pg_probackup"));
186201

202+
//REVIEW FIXME Let' do that.
203+
187204
/* Call pg_start_backup function in PostgreSQL connect */
188205
pg_start_backup(label, smooth_checkpoint, &current, source_node_info, source_conn);
189206
elog(LOG, "pg_start_backup START LSN %X/%X", (uint32) (current.start_lsn >> 32), (uint32) (current.start_lsn));
190207

208+
//REVIEW I wonder, if we can move this piece above and call before pg_start backup()?
209+
//It seems to be a part of setup phase.
191210
if (!dest_pgdata_is_empty &&
192211
(current.backup_mode == BACKUP_MODE_DIFF_PTRACK ||
193212
current.backup_mode == BACKUP_MODE_DIFF_DELTA))
@@ -201,6 +220,8 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
201220
elog(INFO, "syncLSN = %X/%X", (uint32) (dest_redo.lsn >> 32), (uint32) dest_redo.lsn);
202221
}
203222

223+
//REVIEW I wonder, if we can move this piece above and call before pg_start backup()?
224+
//It seems to be a part of setup phase.
204225
/*
205226
* TODO: move to separate function to use in both backup.c and catchup.c
206227
*/
@@ -234,6 +255,7 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
234255
current.start_lsn, current.tli);
235256
}
236257

258+
//REVIEW please adjust the comment.
237259
/* initialize backup list */
238260
source_filelist = parray_new();
239261

@@ -244,12 +266,15 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
244266
else
245267
dir_list_file(source_filelist, source_pgdata,
246268
true, true, false, backup_logs, true, 0, FIO_LOCAL_HOST);
269+
270+
//REVIEW FIXME. Let's fix that before release.
247271
// TODO filter pg_xlog/wal?
248272
// TODO what if wal is not a dir (symlink to a dir)?
249273

250274
/* close ssh session in main thread */
251275
fio_disconnect();
252276

277+
//REVIEW Do we want to do similar calculation for dest?
253278
current.pgdata_bytes += calculate_datasize_of_filelist(source_filelist);
254279
pretty_size(current.pgdata_bytes, pretty_bytes, lengthof(pretty_bytes));
255280
elog(INFO, "Source PGDATA size: %s", pretty_bytes);
@@ -267,12 +292,14 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
267292
*/
268293
parray_qsort(source_filelist, pgFileCompareRelPathWithExternal);
269294

295+
//REVIEW Please adjust the comment.
270296
/* Extract information about files in source_filelist parsing their names:*/
271297
parse_filelist_filenames(source_filelist, source_pgdata);
272298

273299
elog(LOG, "Start LSN (source): %X/%X, TLI: %X",
274300
(uint32) (current.start_lsn >> 32), (uint32) (current.start_lsn),
275301
current.tli);
302+
//REVIEW FIXME Huh? Don't we check TLI at all?
276303
/* TODO проверить, нужна ли проверка TLI */
277304
if (current.backup_mode != BACKUP_MODE_FULL)
278305
elog(LOG, "LSN in destination: %X/%X, TLI: %X",
@@ -339,11 +366,14 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
339366
char to_path[MAXPGPATH];
340367

341368
// perform additional check that this is actually synlink?
369+
//REVIEW Why is this code block separated?
342370
{ /* get full symlink path and map this path to new location */
343371
char source_full_path[MAXPGPATH];
344372
char symlink_content[MAXPGPATH];
345373
join_path_components(source_full_path, source_pgdata, file->rel_path);
346374
fio_readlink(source_full_path, symlink_content, sizeof(symlink_content), FIO_DB_HOST);
375+
//REVIEW What if we won't find mapping for this tablespace?
376+
//I'd expect a failure. Otherwise, we may spoil source database data.
347377
linked_path = leaked_abstraction_get_tablespace_mapping(symlink_content);
348378
// TODO: check that linked_path != symlink_content in case of local catchup?
349379
elog(WARNING, "Map tablespace full_path: \"%s\" old_symlink_content: \"%s\" old_symlink_content: \"%s\"\n",
@@ -361,6 +391,8 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
361391
elog(VERBOSE, "Create directory \"%s\" and symbolic link \"%s\"",
362392
linked_path, to_path);
363393

394+
//REVIEW Handle return value here.
395+
//We should not proceed if failed to create dir.
364396
/* create tablespace directory */
365397
fio_mkdir(linked_path, DIR_PERMISSION, FIO_BACKUP_HOST);
366398

@@ -403,6 +435,8 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
403435
bool redundant = true;
404436
pgFile *file = (pgFile *) parray_get(dest_filelist, i);
405437

438+
//REVIEW Can we maybe optimize it and use some merge-like algorithm
439+
//instead of bsearch for each file? Of course it isn't an urgent fix.
406440
if (parray_bsearch(source_filelist, file, pgFileCompareRelPathWithExternal))
407441
redundant = false;
408442

@@ -411,6 +445,7 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
411445
pg_strcasecmp(file->name, RELMAPPER_FILENAME) == 0)
412446
redundant = true;
413447

448+
//REVIEW This check seems unneded. Anyway we delete only redundant stuff below.
414449
/* do not delete the useful internal directories */
415450
if (S_ISDIR(file->mode) && !redundant)
416451
continue;
@@ -433,12 +468,16 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
433468
}
434469
}
435470

471+
//REVIEW Hmm. Why do we need this at all?
472+
//I'd expect that we init pgfile with unset lock...
473+
//Not related to this patch, though.
436474
/* clear file locks */
437475
pfilearray_clear_locks(source_filelist);
438476

439477
/* Sort by size for load balancing */
440478
parray_qsort(source_filelist, pgFileCompareSize);
441479

480+
//REVIEW. This comment looks a bit misleading, since all theads share same filelist.
442481
/* init thread args with own file lists */
443482
threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads);
444483
threads_args = (catchup_thread_runner_arg *) palloc(sizeof(catchup_thread_runner_arg) * num_threads);
@@ -499,9 +538,12 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
499538
elog(ERROR, "Data files transferring failed, time elapsed: %s",
500539
pretty_time);
501540

541+
//REVIEW The comment looks unrelated to the function. Do I miss something?
502542
/* Notify end of backup */
503543
pg_silent_client_messages(source_conn);
504544

545+
//REVIEW. Do we want to support pg 9.5? I suppose we never test it...
546+
//Maybe check it and error out early?
505547
/* Create restore point
506548
* Only if backup is from master.
507549
* For PG 9.5 create restore point only if pguser is superuser.
@@ -545,14 +587,17 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
545587
stop_backup_result.tablespace_map_content_len = 0;
546588
}
547589

590+
//REVIEW We don't pass a filelist. Please adjust the comment.
548591
/* This function will also add list of xlog files
549592
* to the passed filelist */
550593
if(wait_WAL_streaming_end(NULL))
551594
elog(ERROR, "WAL streaming failed");
552595

596+
//REVIEW Please add a comment about these lsns. It is a crutial part of the algorithm.
553597
current.recovery_xid = stop_backup_result.snapshot_xid;
554598

555599
elog(LOG, "Getting the Recovery Time from WAL");
600+
556601
/* iterate over WAL from stop_backup lsn to start_backup lsn */
557602
if (!read_recovery_info(dest_xlog_path, current.tli,
558603
instance_config.xlog_seg_size,
@@ -566,6 +611,7 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
566611
/* Cleanup */
567612
pg_free(query_text);
568613

614+
//REVIEW Please adjust the comment.
569615
/* In case of backup from replica >= 9.6 we must fix minRecPoint,
570616
* First we must find pg_control in source_filelist.
571617
*/
@@ -601,6 +647,7 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
601647
/* construct fullpath */
602648
if (file->external_dir_num == 0)
603649
join_path_components(to_fullpath, dest_pgdata, file->rel_path);
650+
//REVIEW Let's clean this.
604651
/* TODO разобраться с external */
605652
/*else
606653
{
@@ -638,6 +685,7 @@ do_catchup_instance(const char *source_pgdata, const char *dest_pgdata, PGconn *
638685
parray_walk(source_filelist, pgFileFree);
639686
parray_free(source_filelist);
640687
pgFileFree(source_pg_control_file);
688+
//REVIEW Huh?
641689
// где закрывается conn?
642690
}
643691

@@ -682,6 +730,7 @@ catchup_thread_runner(void *arg)
682730
join_path_components(from_fullpath, arguments->from_root, file->rel_path);
683731
join_path_components(to_fullpath, arguments->to_root, file->rel_path);
684732
}
733+
//REVIEW Let's clean this.
685734
/*else
686735
{
687736
char external_dst[MAXPGPATH];

src/dir.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,7 @@ get_tablespace_mapping(const char *dir)
909909
return dir;
910910
}
911911

912+
//REVIEW What exactly wrong with this abstraction? I don't get it...
912913
/*
913914
* TODO протёкшая абстрация, надо на этапе ревью решить что с ней делать,
914915
* потому как непонятно, почему мы в backup.c напрямую работаем с созданием

0 commit comments

Comments
 (0)