diff --git a/.gitignore b/.gitignore index b46b4ef..4990aa6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,4 @@ .deps *.so *.o -ptrack--2.0.sql Dockerfile - diff --git a/Makefile b/Makefile index 8544f90..ba9ce1d 100644 --- a/Makefile +++ b/Makefile @@ -2,13 +2,11 @@ MODULE_big = ptrack OBJS = ptrack.o datapagemap.o engine.o $(WIN32RES) -EXTENSION = ptrack -EXTVERSION = 2.1 -DATA = ptrack.sql ptrack--2.0--2.1.sql -DATA_built = $(EXTENSION)--$(EXTVERSION).sql PGFILEDESC = "ptrack - block-level incremental backup engine" -EXTRA_CLEAN = $(EXTENSION)--$(EXTVERSION).sql +EXTENSION = ptrack +EXTVERSION = 2.2 +DATA = ptrack--2.1.sql ptrack--2.0--2.1.sql ptrack--2.1--2.2.sql TAP_TESTS = 1 @@ -22,13 +20,3 @@ top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif - -$(EXTENSION)--$(EXTVERSION).sql: ptrack.sql - cat $^ > $@ - -# temp-install: EXTRA_INSTALL=contrib/ptrack - -# check-tap: temp-install -# $(prove_check) - -# check: check-tap diff --git a/README.md b/README.md index 57a7c5c..39ea00b 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,8 @@ To disable `ptrack` and clean up all remaining service files set `ptrack.map_siz * ptrack_version() — returns ptrack version string. * ptrack_init_lsn() — returns LSN of the last ptrack map initialization. - * ptrack_get_pagemapset('LSN') — returns a set of changed data files with bitmaps of changed blocks since specified LSN. + * ptrack_get_pagemapset(start_lsn pg_lsn) — returns a set of changed data files with bitmaps of changed blocks since specified `start_lsn`. + * ptrack_get_change_stat(start_lsn pg_lsn) — returns statistic of changes (number of files, pages and size in MB) since specified `start_lsn`. Usage example: @@ -102,6 +103,10 @@ Usually, you have to only install new version of `ptrack` and do `ALTER EXTENSIO * Do `ALTER EXTENSION 'ptrack' UPDATE;`. * Restart your server. +#### Upgrading from 2.1.* to 2.2.*: + +Since version 2.2 we use a different algorithm for tracking changed pages. Thus, data recorded in the `ptrack.map` using pre 2.2 versions of `ptrack` is incompatible with newer versions. After extension upgrade and server restart old `ptrack.map` will be discarded with `WARNING` and initialized from the scratch. + ## Limitations 1. You can only use `ptrack` safely with `wal_level >= 'replica'`. Otherwise, you can lose tracking of some changes if crash-recovery occurs, since [certain commands are designed not to write WAL at all if wal_level is minimal](https://www.postgresql.org/docs/12/populate.html#POPULATE-PITR), but we only durably flush `ptrack` map at checkpoint time. diff --git a/engine.c b/engine.c index 35cc14c..89217a9 100644 --- a/engine.c +++ b/engine.c @@ -156,6 +156,8 @@ ptrackMapInit(void) sprintf(ptrack_path, "%s/%s", DataDir, PTRACK_PATH); sprintf(ptrack_mmap_path, "%s/%s", DataDir, PTRACK_MMAP_PATH); +ptrack_map_reinit: + /* Remove old PTRACK_MMAP_PATH file, if exists */ if (ptrack_file_exists(ptrack_mmap_path)) durable_unlink(ptrack_mmap_path, LOG); @@ -175,18 +177,15 @@ ptrackMapInit(void) if (stat(ptrack_path, &stat_buf) == 0) { copy_file(ptrack_path, ptrack_mmap_path); - is_new_map = false; /* flag to check checksum */ + is_new_map = false; /* flag to check map file format and checksum */ ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | PG_BINARY); - if (ptrack_fd < 0) - elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path); } else - { /* Create new file for PTRACK_MMAP_PATH */ ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | O_CREAT | PG_BINARY); - if (ptrack_fd < 0) - elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path); - } + + if (ptrack_fd < 0) + elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path); #ifdef WIN32 { @@ -227,7 +226,20 @@ ptrackMapInit(void) elog(ERROR, "ptrack init: wrong map format of file \"%s\"", ptrack_path); /* Check ptrack version inside old ptrack map */ - /* No-op for now, but may be used for future compatibility checks */ + if (ptrack_map->version_num != PTRACK_VERSION_NUM) + { + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("ptrack init: map format version %d in the file \"%s\" is incompatible with loaded version %d", + ptrack_map->version_num, ptrack_path, PTRACK_VERSION_NUM), + errdetail("Deleting file \"%s\" and reinitializing ptrack map.", ptrack_path))); + + /* Clean up everything and try again */ + ptrackCleanFilesAndMap(); + + is_new_map = true; + goto ptrack_map_reinit; + } /* Check CRC */ INIT_CRC32C(crc); @@ -378,7 +390,7 @@ ptrackCheckpoint(void) /* * We are writing ptrack map values to file, but we want to simply map it * into the memory with mmap after a crash/restart. That way, we have to - * write values taking into account all paddings/allignments. + * write values taking into account all paddings/alignments. * * Write both magic and varsion_num at once. */ @@ -435,7 +447,7 @@ ptrackCheckpoint(void) * going to overflow. */ /* - * We should not have any allignment issues here, since sizeof() + * We should not have any alignment issues here, since sizeof() * takes into account all paddings for us. */ ptrack_write_chunk(ptrack_tmp_fd, &crc, (char *) buf, writesz); @@ -446,7 +458,7 @@ ptrackCheckpoint(void) } } - /* Write if anythig left */ + /* Write if anything left */ if ((i + 1) % PTRACK_BUF_SIZE != 0) { size_t writesz = sizeof(pg_atomic_uint64) * j; @@ -641,48 +653,56 @@ void ptrack_mark_block(RelFileNodeBackend smgr_rnode, ForkNumber forknum, BlockNumber blocknum) { + PtBlockId bid; size_t hash; + size_t slot1; + size_t slot2; XLogRecPtr new_lsn; - PtBlockId bid; /* * We use pg_atomic_uint64 here only for alignment purposes, because - * pg_atomic_uint64 is forcely aligned on 8 bytes during the MSVC build. + * pg_atomic_uint64 is forcedly aligned on 8 bytes during the MSVC build. */ pg_atomic_uint64 old_lsn; pg_atomic_uint64 old_init_lsn; - if (ptrack_map_size != 0 && (ptrack_map != NULL) && - smgr_rnode.backend == InvalidBackendId) /* do not track temporary - * relations */ - { - bid.relnode = smgr_rnode.node; - bid.forknum = forknum; - bid.blocknum = blocknum; - hash = BID_HASH_FUNC(bid); - - if (RecoveryInProgress()) - new_lsn = GetXLogReplayRecPtr(NULL); - else - new_lsn = GetXLogInsertRecPtr(); - - old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[hash]); + if (ptrack_map_size == 0 + || ptrack_map == NULL + || smgr_rnode.backend != InvalidBackendId) /* do not track temporary + * relations */ + return; - /* Atomically assign new init LSN value */ - old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn); + bid.relnode = smgr_rnode.node; + bid.forknum = forknum; + bid.blocknum = blocknum; - if (old_init_lsn.value == InvalidXLogRecPtr) - { - elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn); + hash = BID_HASH_FUNC(bid); + slot1 = hash % PtrackContentNblocks; + slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks; - while (old_init_lsn.value < new_lsn && - !pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn)); - } + if (RecoveryInProgress()) + new_lsn = GetXLogReplayRecPtr(NULL); + else + new_lsn = GetXLogInsertRecPtr(); - elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, hash, old_lsn.value, new_lsn); + /* Atomically assign new init LSN value */ + old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn); + if (old_init_lsn.value == InvalidXLogRecPtr) + { + elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn); - /* Atomically assign new LSN value */ - while (old_lsn.value < new_lsn && - !pg_atomic_compare_exchange_u64(&ptrack_map->entries[hash], (uint64 *) &old_lsn.value, new_lsn)); - elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&ptrack_map->entries[hash])); + while (old_init_lsn.value < new_lsn && + !pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn)); } + + /* Atomically assign new LSN value to the first slot */ + old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot1]); + elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, slot1, old_lsn.value, new_lsn); + while (old_lsn.value < new_lsn && + !pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot1], (uint64 *) &old_lsn.value, new_lsn)); + elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&ptrack_map->entries[slot1])); + + /* And to the second */ + old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot2]); + while (old_lsn.value < new_lsn && + !pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot2], (uint64 *) &old_lsn.value, new_lsn)); } diff --git a/engine.h b/engine.h index 34cf15f..e46f803 100644 --- a/engine.h +++ b/engine.h @@ -50,7 +50,7 @@ typedef struct PtrackMapHdr { /* * Three magic bytes (+ \0) to be sure, that we are reading ptrack.map - * with a right PtrackMapHdr strucutre. + * with a right PtrackMapHdr structure. */ char magic[PTRACK_MAGIC_SIZE]; @@ -72,7 +72,6 @@ typedef struct PtrackMapHdr typedef PtrackMapHdr * PtrackMap; -/* TODO: check MAXALIGN usage below */ /* Number of elements in ptrack map (LSN array) */ #define PtrackContentNblocks \ ((ptrack_map_size - offsetof(PtrackMapHdr, entries) - sizeof(pg_crc32c)) / sizeof(pg_atomic_uint64)) @@ -84,9 +83,10 @@ typedef PtrackMapHdr * PtrackMap; /* CRC32 value offset in order to directly access it in the mmap'ed memory chunk */ #define PtrackCrcOffset (PtrackActualSize - sizeof(pg_crc32c)) -/* Map block address 'bid' to map slot */ +/* Block address 'bid' to hash. To get slot position in map should be divided + * with '% PtrackContentNblocks' */ #define BID_HASH_FUNC(bid) \ - (size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0)) % PtrackContentNblocks) + (size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0))) /* * Per process pointer to shared ptrack_map diff --git a/ptrack--2.1--2.2.sql b/ptrack--2.1--2.2.sql new file mode 100644 index 0000000..b09c15e --- /dev/null +++ b/ptrack--2.1--2.2.sql @@ -0,0 +1,35 @@ +/* ptrack/ptrack--2.1--2.2.sql */ + +-- Complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION ptrack UPDATE;" to load this file.\ quit + +DROP FUNCTION ptrack_get_pagemapset(start_lsn pg_lsn); +CREATE FUNCTION ptrack_get_pagemapset(start_lsn pg_lsn) +RETURNS TABLE (path text, + pagecount bigint, + pagemap bytea) +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION ptrack_get_change_stat(start_lsn pg_lsn) + RETURNS TABLE ( + files bigint, + pages numeric, + "size, MB" numeric + ) AS +$func$ +DECLARE +block_size bigint; +BEGIN + block_size := (SELECT setting FROM pg_settings WHERE name = 'block_size'); + + RETURN QUERY + SELECT changed_files, + changed_pages, + block_size * changed_pages / (1024.0 * 1024) + FROM + (SELECT count(path) AS changed_files, + sum(pagecount) AS changed_pages + FROM ptrack_get_pagemapset(start_lsn)) s; +END +$func$ LANGUAGE plpgsql; diff --git a/ptrack.sql b/ptrack--2.1.sql similarity index 94% rename from ptrack.sql rename to ptrack--2.1.sql index 80ae927..c963964 100644 --- a/ptrack.sql +++ b/ptrack--2.1.sql @@ -1,3 +1,5 @@ +/* ptrack/ptrack--2.1.sql */ + -- Complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION ptrack" to load this file. \quit diff --git a/ptrack.c b/ptrack.c index d897ecf..40630e7 100644 --- a/ptrack.c +++ b/ptrack.c @@ -137,7 +137,7 @@ _PG_fini(void) /* * Ptrack follow up for copydir() routine. It parses database OID - * and tablespace OID from path string. We do not need to recoursively + * and tablespace OID from path string. We do not need to recursively * walk subdirs here, copydir() will do it for us if needed. */ static void @@ -420,11 +420,11 @@ PG_FUNCTION_INFO_V1(ptrack_get_pagemapset); Datum ptrack_get_pagemapset(PG_FUNCTION_ARGS) { + PtScanCtx *ctx; FuncCallContext *funcctx; - PtScanCtx *ctx; MemoryContext oldcontext; - XLogRecPtr update_lsn; datapagemap_t pagemap; + int64 pagecount = 0; char gather_path[MAXPGPATH]; /* Exit immediately if there is no map */ @@ -445,12 +445,13 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) /* Make tuple descriptor */ #if PG_VERSION_NUM >= 120000 - tupdesc = CreateTemplateTupleDesc(2); + tupdesc = CreateTemplateTupleDesc(3); #else - tupdesc = CreateTemplateTupleDesc(2, false); + tupdesc = CreateTemplateTupleDesc(3, false); #endif TupleDescInitEntry(tupdesc, (AttrNumber) 1, "path", TEXTOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 2, "pagemap", BYTEAOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "pagecount", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "pagemap", BYTEAOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); funcctx->user_fctx = ctx; @@ -486,14 +487,20 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) while (true) { + size_t hash; + size_t slot1; + size_t slot2; + XLogRecPtr update_lsn1; + XLogRecPtr update_lsn2; + /* Stop traversal if there are no more segments */ if (ctx->bid.blocknum > ctx->relsize) { /* We completed a segment and there is a bitmap to return */ if (pagemap.bitmap != NULL) { - Datum values[2]; - bool nulls[2] = {false}; + Datum values[3]; + bool nulls[3] = {false}; char pathname[MAXPGPATH]; bytea *result = NULL; Size result_sz = pagemap.bitmapsize + VARHDRSZ; @@ -507,11 +514,13 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) strcpy(pathname, ctx->relpath); values[0] = CStringGetTextDatum(pathname); - values[1] = PointerGetDatum(result); + values[1] = Int64GetDatum(pagecount); + values[2] = PointerGetDatum(result); pfree(pagemap.bitmap); pagemap.bitmap = NULL; pagemap.bitmapsize = 0; + pagecount = 0; htup = heap_form_tuple(funcctx->tuple_desc, values, nulls); if (htup) @@ -525,16 +534,34 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) } } - update_lsn = pg_atomic_read_u64(&ptrack_map->entries[BID_HASH_FUNC(ctx->bid)]); + hash = BID_HASH_FUNC(ctx->bid); + slot1 = hash % PtrackContentNblocks; + + update_lsn1 = pg_atomic_read_u64(&ptrack_map->entries[slot1]); - if (update_lsn != InvalidXLogRecPtr) - elog(DEBUG3, "ptrack: update_lsn %X/%X of blckno %u of file %s", - (uint32) (update_lsn >> 32), (uint32) update_lsn, + if (update_lsn1 != InvalidXLogRecPtr) + elog(DEBUG3, "ptrack: update_lsn1 %X/%X of blckno %u of file %s", + (uint32) (update_lsn1 >> 32), (uint32) update_lsn1, ctx->bid.blocknum, ctx->relpath); - /* Block has been changed since specified LSN. Mark it in the bitmap */ - if (update_lsn >= ctx->lsn) - datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE)); + /* Only probe the second slot if the first one is marked */ + if (update_lsn1 >= ctx->lsn) + { + slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks; + update_lsn2 = pg_atomic_read_u64(&ptrack_map->entries[slot2]); + + if (update_lsn2 != InvalidXLogRecPtr) + elog(DEBUG3, "ptrack: update_lsn2 %X/%X of blckno %u of file %s", + (uint32) (update_lsn1 >> 32), (uint32) update_lsn2, + ctx->bid.blocknum, ctx->relpath); + + /* Block has been changed since specified LSN. Mark it in the bitmap */ + if (update_lsn2 >= ctx->lsn) + { + pagecount += 1; + datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE)); + } + } ctx->bid.blocknum += 1; } diff --git a/ptrack.control b/ptrack.control index d2d8792..ec0af9d 100644 --- a/ptrack.control +++ b/ptrack.control @@ -1,5 +1,5 @@ # ptrack extension comment = 'block-level incremental backup engine' -default_version = '2.1' +default_version = '2.2' module_pathname = '$libdir/ptrack' relocatable = true diff --git a/ptrack.h b/ptrack.h index 7e6b6e5..d205115 100644 --- a/ptrack.h +++ b/ptrack.h @@ -22,9 +22,9 @@ #include "utils/relcache.h" /* Ptrack version as a string */ -#define PTRACK_VERSION "2.1" +#define PTRACK_VERSION "2.2" /* Ptrack version as a number */ -#define PTRACK_VERSION_NUM 210 +#define PTRACK_VERSION_NUM 220 /* * Structure identifying block on the disk. diff --git a/t/001_basic.pl b/t/001_basic.pl index 1abc788..bac81f2 100644 --- a/t/001_basic.pl +++ b/t/001_basic.pl @@ -10,7 +10,7 @@ use TestLib; use Test::More; -plan tests => 23; +plan tests => 24; my $node; my $res; @@ -115,6 +115,10 @@ qr/$rel_oid/, 'ptrack pagemapset should contain new relation oid'); +# Check change stats +$res_stdout = $node->safe_psql("postgres", "SELECT pages FROM ptrack_get_change_stat('$flush_lsn')"); +is($res_stdout > 0, 1, 'should be able to get aggregated stats of changes'); + # We should be able to change ptrack map size (but loose all changes) $node->append_conf( 'postgresql.conf', q{