Skip to content

Resolve issues #5 and #1: reduce number of collisions in the ptrack map #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 16, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
.deps
*.so
*.o
ptrack--2.0.sql
Dockerfile

18 changes: 3 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@

MODULE_big = ptrack
OBJS = ptrack.o datapagemap.o engine.o $(WIN32RES)
EXTENSION = ptrack
EXTVERSION = 2.1
DATA = ptrack.sql ptrack--2.0--2.1.sql
DATA_built = $(EXTENSION)--$(EXTVERSION).sql
PGFILEDESC = "ptrack - block-level incremental backup engine"

EXTRA_CLEAN = $(EXTENSION)--$(EXTVERSION).sql
EXTENSION = ptrack
EXTVERSION = 2.2
DATA = ptrack--2.1.sql ptrack--2.0--2.1.sql ptrack--2.1--2.2.sql

TAP_TESTS = 1

Expand All @@ -22,13 +20,3 @@ top_builddir = ../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

$(EXTENSION)--$(EXTVERSION).sql: ptrack.sql
cat $^ > $@

# temp-install: EXTRA_INSTALL=contrib/ptrack

# check-tap: temp-install
# $(prove_check)

# check: check-tap
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ To disable `ptrack` and clean up all remaining service files set `ptrack.map_siz

* ptrack_version() — returns ptrack version string.
* ptrack_init_lsn() — returns LSN of the last ptrack map initialization.
* ptrack_get_pagemapset('LSN') — returns a set of changed data files with bitmaps of changed blocks since specified LSN.
* ptrack_get_pagemapset(start_lsn pg_lsn) — returns a set of changed data files with bitmaps of changed blocks since specified `start_lsn`.
* ptrack_get_change_stat(start_lsn pg_lsn) — returns statistic of changes (number of files, pages and size in MB) since specified `start_lsn`.
* ptrack_get_change_file_stat(start_lsn pg_lsn) — returns per file statistic of changes (number of pages and size in MB) since specified `start_lsn`.

Usage example:

Expand Down Expand Up @@ -102,6 +104,10 @@ Usually, you have to only install new version of `ptrack` and do `ALTER EXTENSIO
* Do `ALTER EXTENSION 'ptrack' UPDATE;`.
* Restart your server.

#### Upgrading from 2.1.* to 2.2.*:

Since version 2.2 we use a different algorithm for tracking changed pages. Thus, data recorded in the `ptrack.map` using pre 2.2 versions of `ptrack` is incompatible with newer versions. After extension upgrade and server restart old `ptrack.map` will be discarded with `WARNING` and initialized from the scratch.

## Limitations

1. You can only use `ptrack` safely with `wal_level >= 'replica'`. Otherwise, you can lose tracking of some changes if crash-recovery occurs, since [certain commands are designed not to write WAL at all if wal_level is minimal](https://www.postgresql.org/docs/12/populate.html#POPULATE-PITR), but we only durably flush `ptrack` map at checkpoint time.
Expand Down
95 changes: 57 additions & 38 deletions engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ ptrackMapInit(void)
sprintf(ptrack_path, "%s/%s", DataDir, PTRACK_PATH);
sprintf(ptrack_mmap_path, "%s/%s", DataDir, PTRACK_MMAP_PATH);

ptrack_map_reinit:

/* Remove old PTRACK_MMAP_PATH file, if exists */
if (ptrack_file_exists(ptrack_mmap_path))
durable_unlink(ptrack_mmap_path, LOG);
Expand All @@ -175,18 +177,15 @@ ptrackMapInit(void)
if (stat(ptrack_path, &stat_buf) == 0)
{
copy_file(ptrack_path, ptrack_mmap_path);
is_new_map = false; /* flag to check checksum */
is_new_map = false; /* flag to check map file format and checksum */
ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | PG_BINARY);
if (ptrack_fd < 0)
elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path);
}
else
{
/* Create new file for PTRACK_MMAP_PATH */
ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | O_CREAT | PG_BINARY);
if (ptrack_fd < 0)
elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path);
}

if (ptrack_fd < 0)
elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path);

#ifdef WIN32
{
Expand Down Expand Up @@ -227,7 +226,19 @@ ptrackMapInit(void)
elog(ERROR, "ptrack init: wrong map format of file \"%s\"", ptrack_path);

/* Check ptrack version inside old ptrack map */
/* No-op for now, but may be used for future compatibility checks */
if (ptrack_map->version_num != PTRACK_VERSION_NUM)
{
ereport(WARNING,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("ptrack init: map format version %d in the file \"%s\" is incompatible with loaded version %d",
ptrack_map->version_num, ptrack_path, PTRACK_VERSION_NUM),
errdetail("Deleting file \"%s\" and reinitializing ptrack map.", ptrack_path)));

/* Delete and try again */
durable_unlink(ptrack_path, LOG);
is_new_map = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Не могу найти, где делается unmap в этом случае?
При этом сразу после метки ptrack_map_reinit делается durable_unlink(ptrack_mmap_path).
В итоге, этот файл повисает невидимкой в файловой системе, и в адрессном пространстве процесса повисает его mmap.

Наверное есть смысл позвать здесь ptrackCleanFilesAndMap ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Да, похоже на то. Я сомневался в этом месте, но потом забыл и не разобрался до конца

goto ptrack_map_reinit;
}

/* Check CRC */
INIT_CRC32C(crc);
Expand Down Expand Up @@ -641,48 +652,56 @@ void
ptrack_mark_block(RelFileNodeBackend smgr_rnode,
ForkNumber forknum, BlockNumber blocknum)
{
PtBlockId bid;
size_t hash;
size_t slot1;
size_t slot2;
XLogRecPtr new_lsn;
PtBlockId bid;
/*
* We use pg_atomic_uint64 here only for alignment purposes, because
* pg_atomic_uint64 is forcely aligned on 8 bytes during the MSVC build.
* pg_atomic_uint64 is forcedly aligned on 8 bytes during the MSVC build.
*/
pg_atomic_uint64 old_lsn;
pg_atomic_uint64 old_init_lsn;

if (ptrack_map_size != 0 && (ptrack_map != NULL) &&
smgr_rnode.backend == InvalidBackendId) /* do not track temporary
* relations */
{
bid.relnode = smgr_rnode.node;
bid.forknum = forknum;
bid.blocknum = blocknum;
hash = BID_HASH_FUNC(bid);

if (RecoveryInProgress())
new_lsn = GetXLogReplayRecPtr(NULL);
else
new_lsn = GetXLogInsertRecPtr();
if (ptrack_map_size == 0
|| ptrack_map == NULL
|| smgr_rnode.backend != InvalidBackendId) /* do not track temporary
* relations */
return;

old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[hash]);
bid.relnode = smgr_rnode.node;
bid.forknum = forknum;
bid.blocknum = blocknum;

/* Atomically assign new init LSN value */
old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn);
hash = BID_HASH_FUNC(bid);
slot1 = hash % PtrackContentNblocks;
slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks;

if (old_init_lsn.value == InvalidXLogRecPtr)
{
elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn);

while (old_init_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn));
}
if (RecoveryInProgress())
new_lsn = GetXLogReplayRecPtr(NULL);
else
new_lsn = GetXLogInsertRecPtr();

elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, hash, old_lsn.value, new_lsn);
/* Atomically assign new init LSN value */
old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn);
if (old_init_lsn.value == InvalidXLogRecPtr)
{
elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn);

/* Atomically assign new LSN value */
while (old_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->entries[hash], (uint64 *) &old_lsn.value, new_lsn));
elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&ptrack_map->entries[hash]));
while (old_init_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn));
}

/* Atomically assign new LSN value to the first slot */
old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot1]);
elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, slot1, old_lsn.value, new_lsn);
while (old_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot1], (uint64 *) &old_lsn.value, new_lsn));
elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&ptrack_map->entries[slot1]));

/* And to the second */
old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot2]);
while (old_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot2], (uint64 *) &old_lsn.value, new_lsn));
}
8 changes: 4 additions & 4 deletions engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ typedef struct PtrackMapHdr
{
/*
* Three magic bytes (+ \0) to be sure, that we are reading ptrack.map
* with a right PtrackMapHdr strucutre.
* with a right PtrackMapHdr structure.
*/
char magic[PTRACK_MAGIC_SIZE];

Expand All @@ -72,7 +72,6 @@ typedef struct PtrackMapHdr

typedef PtrackMapHdr * PtrackMap;

/* TODO: check MAXALIGN usage below */
/* Number of elements in ptrack map (LSN array) */
#define PtrackContentNblocks \
((ptrack_map_size - offsetof(PtrackMapHdr, entries) - sizeof(pg_crc32c)) / sizeof(pg_atomic_uint64))
Expand All @@ -84,9 +83,10 @@ typedef PtrackMapHdr * PtrackMap;
/* CRC32 value offset in order to directly access it in the mmap'ed memory chunk */
#define PtrackCrcOffset (PtrackActualSize - sizeof(pg_crc32c))

/* Map block address 'bid' to map slot */
/* Block address 'bid' to hash. To get slot position in map should be divided
* with '% PtrackContentNblocks' */
#define BID_HASH_FUNC(bid) \
(size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0)) % PtrackContentNblocks)
(size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0)))

/*
* Per process pointer to shared ptrack_map
Expand Down
54 changes: 54 additions & 0 deletions ptrack--2.1--2.2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/* ptrack/ptrack--2.1--2.2.sql */

-- Complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION ptrack UPDATE;" to load this file.\ quit

CREATE FUNCTION ptrack_get_change_stat(start_lsn pg_lsn)
RETURNS TABLE (
files bigint,
pages bigint,
"size, MB" numeric
) AS
$func$
DECLARE
block_size bigint;
BEGIN
block_size := (SELECT setting FROM pg_settings WHERE name = 'block_size');

RETURN QUERY
SELECT changed_files,
changed_pages,
block_size*changed_pages/(1024.0*1024)
FROM
(SELECT count(path) AS changed_files,
sum(
length(replace(right((pagemap)::text, -1)::varbit::text, '0', ''))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Если таблицы 8TB, то вот эта строчка потребует выделение 1GB памяти для преобразования ::varbit::text.
Соответственно, таблица 16TB потребует уже 2GB памяти, и постгресс просто сам не позволит этого сделать.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Это очень грустно, что varbit не имеет функции countbits.

Copy link
Contributor

@funny-falcon funny-falcon May 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

В любом случае, для ptrack_get_change_stat и ptrack_get_change_file_stat кажется нужно создать ptrack_get_pagecount (ну или другое название).
Или даже просто реализовать ptrack_get_change_file_stat полностью в сишке.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Таблицы же разбиты на сегменты по 1 ГБ дефолтно, а ptrack_get_pagemapset() выдаёт изначально битмапы per file/segment, то есть потребуется максимум в 1000 раз меньше памяти на каждое преобразование. Разве нет?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

А ок. Я ещё не посмотрел ptrack_get_pagemapset() .

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Слушай, но я бы всё равно поменял бы ptrack_get_pagemapset, добавив поле count в вывод.
pg_probackup при этом не поломается, т.к. он указывает поля, которые хочет.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Сделал

) AS changed_pages
FROM ptrack_get_pagemapset(start_lsn)) s;
END
$func$ LANGUAGE plpgsql;

CREATE FUNCTION ptrack_get_change_file_stat(start_lsn pg_lsn)
RETURNS TABLE (
file_path text,
pages int,
"size, MB" numeric
) AS
$func$
DECLARE
block_size bigint;
BEGIN
block_size := (SELECT setting FROM pg_settings WHERE name = 'block_size');

RETURN QUERY
SELECT s.path,
changed_pages,
block_size*changed_pages/(1024.0*1024)
FROM
(SELECT path,
length(replace(right((pagemap)::text, -1)::varbit::text, '0', ''))
AS changed_pages
FROM ptrack_get_pagemapset(start_lsn)) s
ORDER BY (changed_pages, s.path) DESC;
END
$func$ LANGUAGE plpgsql;
2 changes: 2 additions & 0 deletions ptrack.sql → ptrack--2.1.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
/* ptrack/ptrack--2.1.sql */

-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION ptrack" to load this file. \quit

Expand Down
38 changes: 29 additions & 9 deletions ptrack.c
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,9 @@ PG_FUNCTION_INFO_V1(ptrack_get_pagemapset);
Datum
ptrack_get_pagemapset(PG_FUNCTION_ARGS)
{
PtScanCtx *ctx;
FuncCallContext *funcctx;
PtScanCtx *ctx;
MemoryContext oldcontext;
XLogRecPtr update_lsn;
datapagemap_t pagemap;
char gather_path[MAXPGPATH];

Expand Down Expand Up @@ -486,6 +485,12 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS)

while (true)
{
size_t hash;
size_t slot1;
size_t slot2;
XLogRecPtr update_lsn1;
XLogRecPtr update_lsn2;

/* Stop traversal if there are no more segments */
if (ctx->bid.blocknum > ctx->relsize)
{
Expand Down Expand Up @@ -525,16 +530,31 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS)
}
}

update_lsn = pg_atomic_read_u64(&ptrack_map->entries[BID_HASH_FUNC(ctx->bid)]);
hash = BID_HASH_FUNC(ctx->bid);
slot1 = hash % PtrackContentNblocks;

update_lsn1 = pg_atomic_read_u64(&ptrack_map->entries[slot1]);

if (update_lsn != InvalidXLogRecPtr)
elog(DEBUG3, "ptrack: update_lsn %X/%X of blckno %u of file %s",
(uint32) (update_lsn >> 32), (uint32) update_lsn,
if (update_lsn1 != InvalidXLogRecPtr)
elog(DEBUG3, "ptrack: update_lsn1 %X/%X of blckno %u of file %s",
(uint32) (update_lsn1 >> 32), (uint32) update_lsn1,
ctx->bid.blocknum, ctx->relpath);

/* Block has been changed since specified LSN. Mark it in the bitmap */
if (update_lsn >= ctx->lsn)
datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE));
/* Only probe the second slot if the first one is marked */
if (update_lsn1 >= ctx->lsn)
{
slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks;
update_lsn2 = pg_atomic_read_u64(&ptrack_map->entries[slot2]);

if (update_lsn2 != InvalidXLogRecPtr)
elog(DEBUG3, "ptrack: update_lsn2 %X/%X of blckno %u of file %s",
(uint32) (update_lsn1 >> 32), (uint32) update_lsn2,
ctx->bid.blocknum, ctx->relpath);

/* Block has been changed since specified LSN. Mark it in the bitmap */
if (update_lsn2 >= ctx->lsn)
datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE));
}

ctx->bid.blocknum += 1;
}
Expand Down
2 changes: 1 addition & 1 deletion ptrack.control
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ptrack extension
comment = 'block-level incremental backup engine'
default_version = '2.1'
default_version = '2.2'
module_pathname = '$libdir/ptrack'
relocatable = true
4 changes: 2 additions & 2 deletions ptrack.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
#include "utils/relcache.h"

/* Ptrack version as a string */
#define PTRACK_VERSION "2.1"
#define PTRACK_VERSION "2.2"
/* Ptrack version as a number */
#define PTRACK_VERSION_NUM 210
#define PTRACK_VERSION_NUM 220

/*
* Structure identifying block on the disk.
Expand Down
9 changes: 8 additions & 1 deletion t/001_basic.pl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
use TestLib;
use Test::More;

plan tests => 23;
plan tests => 25;

my $node;
my $res;
Expand Down Expand Up @@ -115,6 +115,13 @@
qr/$rel_oid/,
'ptrack pagemapset should contain new relation oid');

# Check change stats
$res_stdout = $node->safe_psql("postgres", "SELECT pages FROM ptrack_get_change_stat('$flush_lsn')");
is($res_stdout > 0, 1, 'should be able to get aggregated stats of changes');

$res_stdout = $node->safe_psql("postgres", "SELECT count(*) FROM ptrack_get_change_file_stat('$flush_lsn')");
is($res_stdout > 0, 1, 'should be able to get per file stats of changes');

# We should be able to change ptrack map size (but loose all changes)
$node->append_conf(
'postgresql.conf', q{
Expand Down