Skip to content

Commit 87252a8

Browse files
committed
[Issue #66] add sanity checks for incremental restore
1 parent 79c8ea3 commit 87252a8

File tree

8 files changed

+246
-32
lines changed

8 files changed

+246
-32
lines changed

src/catalog.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1902,7 +1902,7 @@ write_backup_filelist(pgBackup *backup, parray *files, const char *root,
19021902
if (file->linked)
19031903
len += sprintf(line+len, ",\"linked\":\"%s\"", file->linked);
19041904

1905-
if (file->n_blocks != BLOCKNUM_INVALID)
1905+
if (file->n_blocks > 0)
19061906
len += sprintf(line+len, ",\"n_blocks\":\"%i\"", file->n_blocks);
19071907

19081908
sprintf(line+len, "}\n");

src/dir.c

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,7 +1021,7 @@ opt_externaldir_map(ConfigOption *opt, const char *arg)
10211021
*/
10221022
void
10231023
create_data_directories(parray *dest_files, const char *data_dir, const char *backup_dir,
1024-
bool extract_tablespaces, fio_location location)
1024+
bool extract_tablespaces, bool incremental, fio_location location)
10251025
{
10261026
int i;
10271027
parray *links = NULL;
@@ -1126,7 +1126,7 @@ create_data_directories(parray *dest_files, const char *data_dir, const char *ba
11261126
fio_mkdir(linked_path, pg_tablespace_mode, location);
11271127

11281128
/* create link to linked_path */
1129-
if (fio_symlink(linked_path, to_path, location) < 0)
1129+
if (fio_symlink(linked_path, to_path, incremental, location) < 0)
11301130
elog(ERROR, "Could not create symbolic link \"%s\": %s",
11311131
to_path, strerror(errno));
11321132

@@ -1203,13 +1203,18 @@ read_tablespace_map(parray *files, const char *backup_dir)
12031203

12041204
/*
12051205
* Check that all tablespace mapping entries have correct linked directory
1206-
* paths. Linked directories must be empty or do not exist.
1206+
* paths. Linked directories must be empty or do not exist, unless
1207+
* we are running incremental restore, then linked directories can be nonempty.
12071208
*
12081209
* If tablespace-mapping option is supplied, all OLDDIR entries must have
12091210
* entries in tablespace_map file.
1211+
*
1212+
* When running incremental restore with tablespace remapping, then
1213+
* new tablespace directory MUST be empty, because there is no
1214+
* we can be sure, that files laying there belong to our instance.
12101215
*/
12111216
void
1212-
check_tablespace_mapping(pgBackup *backup)
1217+
check_tablespace_mapping(pgBackup *backup, bool incremental, bool *tblspaces_are_empty)
12131218
{
12141219
// char this_backup_path[MAXPGPATH];
12151220
parray *links;
@@ -1236,6 +1241,18 @@ check_tablespace_mapping(pgBackup *backup)
12361241
elog(ERROR, "--tablespace-mapping option's old directory "
12371242
"doesn't have an entry in tablespace_map file: \"%s\"",
12381243
cell->old_dir);
1244+
1245+
/* For incremental restore, check that new directory is empty */
1246+
if (incremental)
1247+
{
1248+
if (!is_absolute_path(cell->new_dir))
1249+
elog(ERROR, "tablespace directory is not an absolute path: %s\n",
1250+
cell->new_dir);
1251+
1252+
if (!dir_is_empty(cell->new_dir, FIO_DB_HOST))
1253+
elog(ERROR, "restore tablespace destination is not empty: \"%s\"",
1254+
cell->new_dir);
1255+
}
12391256
}
12401257

12411258
/* 2 - all linked directories must be empty */
@@ -1257,8 +1274,12 @@ check_tablespace_mapping(pgBackup *backup)
12571274
linked_path);
12581275

12591276
if (!dir_is_empty(linked_path, FIO_DB_HOST))
1260-
elog(ERROR, "restore tablespace destination is not empty: \"%s\"",
1261-
linked_path);
1277+
{
1278+
if (!incremental)
1279+
elog(ERROR, "restore tablespace destination is not empty: \"%s\"",
1280+
linked_path);
1281+
*tblspaces_are_empty = false;
1282+
}
12621283
}
12631284

12641285
free(tmp_file);

src/merge.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -589,7 +589,7 @@ merge_chain(parray *parent_chain, pgBackup *full_backup, pgBackup *dest_backup)
589589

590590
/* Create directories */
591591
create_data_directories(dest_backup->files, full_database_dir,
592-
dest_backup->root_dir, false, FIO_BACKUP_HOST);
592+
dest_backup->root_dir, false, false, FIO_BACKUP_HOST);
593593

594594
/* External directories stuff */
595595
if (dest_backup->external_dir_str)

src/pg_probackup.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -867,12 +867,13 @@ extern void create_data_directories(parray *dest_files,
867867
const char *data_dir,
868868
const char *backup_dir,
869869
bool extract_tablespaces,
870+
bool incremental,
870871
fio_location location);
871872

872873
extern void read_tablespace_map(parray *files, const char *backup_dir);
873874
extern void opt_tablespace_map(ConfigOption *opt, const char *arg);
874875
extern void opt_externaldir_map(ConfigOption *opt, const char *arg);
875-
extern void check_tablespace_mapping(pgBackup *backup);
876+
extern void check_tablespace_mapping(pgBackup *backup, bool incremental, bool *tblspaces_are_empty);
876877
extern void check_external_dir_mapping(pgBackup *backup);
877878
extern char *get_external_remap(char *current_dir);
878879

@@ -951,6 +952,7 @@ extern bool create_empty_file(fio_location from_location, const char *to_root,
951952

952953
extern uint16 *get_checksum_map(const char *fullpath, uint32 checksum_version,
953954
int n_blocks, XLogRecPtr dest_stop_lsn, BlockNumber segmentno);
955+
extern pid_t check_postmaster(const char *pgdata);
954956

955957
extern bool check_file_pages(pgFile *file, const char *fullpath, XLogRecPtr stop_lsn,
956958
uint32 checksum_version, uint32 backup_version);
@@ -1048,6 +1050,7 @@ extern bool pgut_rmtree(const char *path, bool rmtopdir, bool strict);
10481050

10491051
extern uint16 *fio_get_checksum_map(const char *fullpath, uint32 checksum_version,
10501052
int n_blocks, XLogRecPtr dest_stop_lsn, BlockNumber segmentno);
1053+
extern pid_t fio_check_postmaster(const char *pgdata);
10511054

10521055
extern int32 fio_decompress(void* dst, void const* src, size_t size, int compress_alg);
10531056

src/restore.c

Lines changed: 106 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ static void pg12_recovery_config(pgBackup *backup, bool add_include);
4949
static void restore_chain(pgBackup *dest_backup, parray *parent_chain,
5050
parray *dbOid_exclude_list, pgRestoreParams *params,
5151
const char *pgdata_path, bool no_sync);
52+
static void check_incremental_compatibility(const char *pgdata, uint64 system_identifier,
53+
bool lsn_based);
5254

5355
/*
5456
* Iterate over backup list to find all ancestors of the broken parent_backup
@@ -111,6 +113,8 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
111113
char *action = params->is_restore ? "Restore":"Validate";
112114
parray *parent_chain = NULL;
113115
parray *dbOid_exclude_list = NULL;
116+
bool pgdata_is_empty = true;
117+
bool tblspaces_are_empty = true;
114118

115119
if (params->is_restore)
116120
{
@@ -126,16 +130,17 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
126130
{
127131
elog(INFO, "Running incremental restore into nonempty directory: \"%s\"",
128132
instance_config.pgdata);
133+
134+
check_incremental_compatibility(instance_config.pgdata,
135+
instance_config.system_identifier,
136+
false);
129137
}
130138
else
131139
elog(ERROR, "Restore destination is not empty: \"%s\"",
132140
instance_config.pgdata);
133-
}
134-
else
135-
{
136-
/* if remote directory is empty then disable incremental restore */
137-
if (params->incremental)
138-
params->incremental = false;
141+
142+
/* if remote directory is empty then incremental restore may be disabled */
143+
pgdata_is_empty = true;
139144
}
140145
}
141146

@@ -340,7 +345,10 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
340345
*/
341346
if (params->is_restore)
342347
{
343-
check_tablespace_mapping(dest_backup);
348+
check_tablespace_mapping(dest_backup, params->incremental, &tblspaces_are_empty);
349+
350+
if (pgdata_is_empty && tblspaces_are_empty)
351+
params->incremental = false;
344352

345353
/* no point in checking external directories if their restore is not requested */
346354
if (!params->skip_external_dirs)
@@ -602,7 +610,8 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
602610
* Restore dest_backup internal directories.
603611
*/
604612
create_data_directories(dest_files, instance_config.pgdata,
605-
dest_backup->root_dir, true, FIO_DB_HOST);
613+
dest_backup->root_dir, true,
614+
params->incremental, FIO_DB_HOST);
606615

607616
/*
608617
* Restore dest_backup external directories.
@@ -660,16 +669,23 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
660669
elog(INFO, "Extracting the content of destination directory for incremental restore");
661670

662671
/* TODO: external directorues */
672+
time(&start_time);
663673
if (fio_is_remote(FIO_DB_HOST))
664674
fio_list_dir(pgdata_files, pgdata_path, false, true, false, false, true, 0);
665675
else
666676
dir_list_file(pgdata_files, pgdata_path,
667677
false, true, false, false, true, 0, FIO_LOCAL_HOST);
668-
669678
parray_qsort(pgdata_files, pgFileCompareRelPathWithExternalDesc);
670-
elog(INFO, "Destination directory content extracted, time elapsed:");
671679

672-
elog(INFO, "Removing redundant files");
680+
time(&end_time);
681+
pretty_time_interval(difftime(end_time, start_time),
682+
pretty_time, lengthof(pretty_time));
683+
684+
elog(INFO, "Destination directory content extracted, time elapsed: %s",
685+
pretty_time);
686+
687+
elog(INFO, "Removing redundant files in destination directory");
688+
time(&start_time);
673689
for (i = 0; i < parray_num(pgdata_files); i++)
674690
{
675691
pgFile *file = (pgFile *) parray_get(pgdata_files, i);
@@ -686,7 +702,11 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
686702
}
687703
}
688704

689-
elog(INFO, "Redundant files are removed, time elapsed:");
705+
time(&end_time);
706+
pretty_time_interval(difftime(end_time, start_time),
707+
pretty_time, lengthof(pretty_time));
708+
709+
elog(INFO, "Redundant files are removed, time elapsed: %s", pretty_time);
690710

691711
// use_bitmap = true;
692712
/* At this point PDATA do not contain files, that also do not exists in backup filelist */
@@ -756,9 +776,9 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
756776
elog(INFO, "Backup files are restored. Transfered bytes: %s, time elapsed: %s",
757777
pretty_total_bytes, pretty_time);
758778

759-
elog(INFO, "Approximate restore efficiency ratio: %.f%% (%s/%s)",
760-
((float) dest_bytes / total_bytes) * 100,
761-
pretty_dest_bytes, pretty_total_bytes);
779+
elog(INFO, "Restore overwriting ratio (less is better): %.f%% (%s/%s)",
780+
((float) total_bytes / dest_bytes) * 100,
781+
pretty_total_bytes, pretty_dest_bytes);
762782
}
763783
else
764784
elog(ERROR, "Backup files restoring failed. Transfered bytes: %s, time elapsed: %s",
@@ -921,7 +941,9 @@ restore_files(void *arg)
921941

922942
if (arguments->incremental &&
923943
parray_bsearch(arguments->pgdata_files, dest_file, pgFileCompareRelPathWithExternalDesc))
944+
{
924945
already_exists = true;
946+
}
925947

926948
/*
927949
* Handle incremental restore case for data files.
@@ -933,7 +955,6 @@ restore_files(void *arg)
933955
dest_file->is_datafile && !dest_file->is_cfs &&
934956
dest_file->n_blocks > 0)
935957
{
936-
elog(INFO, "HELLO");
937958
/* remote mode */
938959
if (fio_is_remote(FIO_DB_HOST))
939960
checksum_map = fio_get_checksum_map(to_fullpath, arguments->dest_backup->checksum_version,
@@ -1743,3 +1764,72 @@ get_dbOid_exclude_list(pgBackup *backup, parray *datname_list,
17431764

17441765
return dbOid_exclude_list;
17451766
}
1767+
1768+
/* check that instance has the same SYSTEM_ID, */
1769+
void
1770+
check_incremental_compatibility(const char *pgdata, uint64 system_identifier, bool lsn_based)
1771+
{
1772+
uint64 system_id_pgdata;
1773+
bool success = true;
1774+
pid_t pid;
1775+
char backup_label[MAXPGPATH];
1776+
1777+
/* slurp pg_control and check that system ID is the same */
1778+
/* check that instance is not running */
1779+
/* if lsn_based, check that there is no backup_label files is around AND
1780+
* get redo point lsn from destination pg_control.
1781+
1782+
* It is really important to be sure that pg_control is in cohesion with
1783+
* data files content, because based on pg_control information we will
1784+
* choose a backup suitable for lsn based incremental restore.
1785+
*/
1786+
/* TODO: handle timeline discrepancies */
1787+
1788+
system_id_pgdata = get_system_identifier(pgdata);
1789+
1790+
if (system_id_pgdata != instance_config.system_identifier)
1791+
{
1792+
elog(WARNING, "Backup catalog was initialized for system id %lu, "
1793+
"but destination directory system id is %lu",
1794+
system_identifier, system_id_pgdata);
1795+
success = false;
1796+
}
1797+
1798+
/* check postmaster pid */
1799+
if (fio_is_remote(FIO_DB_HOST))
1800+
pid = fio_check_postmaster(pgdata);
1801+
else
1802+
pid = check_postmaster(pgdata);
1803+
1804+
if (pid == 1) /* postmaster.pid is mangled */
1805+
{
1806+
char pid_file[MAXPGPATH];
1807+
1808+
snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", pgdata);
1809+
elog(WARNING, "Pid file \"%s\" is mangled, cannot determine whether postmaster is running or not",
1810+
pid_file);
1811+
success = false;
1812+
}
1813+
else if (pid > 1)
1814+
{
1815+
elog(WARNING, "Postmaster with pid %u is running in destination directory \"%s\"",
1816+
pid, pgdata);
1817+
success = false;
1818+
}
1819+
1820+
if (lsn_based)
1821+
{
1822+
snprintf(backup_label, MAXPGPATH, "%s/backup_label", pgdata);
1823+
if (fio_access(backup_label, F_OK, FIO_DB_HOST) == 0)
1824+
{
1825+
elog(WARNING, "Destination directory contains \"backup_control\" file. "
1826+
"It does not mean that you should delete this file, only that "
1827+
"lsn-based incremental restore is dangerous to use in this case. "
1828+
"Consider to use checksum-based incremental restore");
1829+
success = false;
1830+
}
1831+
}
1832+
1833+
if (!success)
1834+
elog(ERROR, "Incremental restore is impossible");
1835+
}

src/util.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,3 +549,50 @@ datapagemap_print_debug(datapagemap_t *map)
549549

550550
pg_free(iter);
551551
}
552+
553+
/*
554+
* return pid of postmaster process running in given pgdata.
555+
* return 0 if there is none.
556+
*/
557+
pid_t
558+
check_postmaster(const char *pgdata)
559+
{
560+
FILE *fp;
561+
pid_t pid;
562+
char pid_file[MAXPGPATH];
563+
564+
snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", pgdata);
565+
566+
fp = fopen(pid_file, "r");
567+
if (fp == NULL)
568+
{
569+
/* No pid file, acceptable*/
570+
if (errno == ENOENT)
571+
return 0;
572+
else
573+
elog(ERROR, "Cannot open file \"%s\": %s",
574+
pid_file, strerror(errno));
575+
}
576+
577+
if (fscanf(fp, "%i", &pid) != 1)
578+
{
579+
/* something is wrong with the file content */
580+
pid = 1;
581+
}
582+
583+
if (pid > 1)
584+
{
585+
if (kill(pid, 0) != 0)
586+
{
587+
/* process no longer exists */
588+
if (errno == ESRCH)
589+
pid = 0;
590+
else
591+
elog(ERROR, "Failed to send signal 0 to a process %d: %s",
592+
pid, strerror(errno));
593+
}
594+
}
595+
596+
fclose(fp);
597+
return pid;
598+
}

0 commit comments

Comments
 (0)