diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..f05773a --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,94 @@ +name: Test + +on: + push: + branches: + - "**" + pull_request: + branches: + - main + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + Test: + strategy: + matrix: + # pg_version: [15] + pg_version: [11, 12, 13, 14, 15] + os: [ubuntu-22.04] + # tests: [tap] + tests: [tap, python] + # test_mode: [normal, legacy, paranoia] + test_mode: [normal, paranoia] + exclude: + - tests: tap + test_mode: paranoia + - tests: python + test_mode: normal + - tests: python + test_mode: legacy + fail-fast: false + name: ${{ format('Ptrack ({0}, PostgreSQL {1}, {2} tests, {3} mode)', matrix.os, matrix.pg_version, matrix.tests, matrix.test_mode) }} + container: + image: ${{ format('ghcr.io/postgres-dev/{0}:1.0', matrix.os) }} + env: + PG_BRANCH: ${{ format('REL_{0}_STABLE', matrix.pg_version) }} + PGDATA: $HOME/data + TEST_MODE: ${{ matrix.test_mode }} + options: --privileged + steps: + - name: Get Postgres sources + uses: actions/checkout@v3 + with: + repository: postgres/postgres + ref: ${{ format('REL_{0}_STABLE', matrix.pg_version) }} + path: postgres + - name: Get Ptrack sources + uses: actions/checkout@v3 + with: + path: ptrack + - name: Get Pg_probackup sources + uses: actions/checkout@v3 + with: + repository: postgrespro/pg_probackup + path: pg_probackup + - name: Apply ptrack patches + run: make patch top_builddir=../postgres + working-directory: ptrack + - name: Install Postgres + run: | + make install-postgres top_builddir=$GITHUB_WORKSPACE/postgres prefix=$HOME/pgsql && + echo $HOME/pgsql/bin >> $GITHUB_PATH + working-directory: ptrack + - name: Install Ptrack + run: make install USE_PGXS=1 PG_CPPFLAGS=-coverage SHLIB_LINK=-coverage + working-directory: ptrack + - name: Install Pg_probackup + run: make install-pg-probackup USE_PGXS=1 top_srcdir=../postgres + working-directory: ptrack + shell: bash {0} + - name: Install additional packages + run: | + apt update && + apt install -y python3-pip python3-six python3-pytest python3-pytest-xdist curl && + pip3 install --no-input testgres + # All steps have been so far executed by root but ptrack tests run from an + # unprivileged user so change some permissions + - name: Adjust the permissions of ptrack test folders + run: | + mkdir pg_probackup/tests/tmp_dirs + chown -R "dev:" pg_probackup ptrack + - name: Test + run: make test-${{ matrix.tests }} USE_PGXS=1 + working-directory: ptrack + shell: runuser dev {0} + - name: Collect coverage results + run: make coverage + working-directory: ptrack + shell: runuser dev {0} + - name: Upload coverage results to Codecov + uses: codecov/codecov-action@v3 + with: + working-directory: ptrack + runs-on: ubuntu-latest diff --git a/.gitignore b/.gitignore index ffc5dc7..50591c5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ .deps *.so *.o -ptrack--2.0.sql - +Dockerfile +/tmp_check/ diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 0000000..ed4d0eb --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,22 @@ +# Authors + +This list is sorted by the number of commits per contributor in _descending_ order. + +Avatar|Contributor|Contributions +:-:|---|:-: +@ololobus|[@ololobus](https://github.com/ololobus)|62 +@funny-falcon|[@funny-falcon](https://github.com/funny-falcon)|15 +@alubennikova|[@alubennikova](https://github.com/alubennikova)|9 +@kulaginm|[@kulaginm](https://github.com/kulaginm)|5 +@daniel-95|[@daniel-95](https://github.com/daniel-95)|4 +@ziva777|[@ziva777](https://github.com/ziva777)|2 +@vegebird|[@vegebird](https://github.com/vegebird)|2 +@kovdb75|[@kovdb75](https://github.com/kovdb75)|1 +@MarinaPolyakova|[@MarinaPolyakova](https://github.com/MarinaPolyakova)|1 +@rzharkov|[@rzharkov](https://github.com/rzharkov)|1 +@vbwagner|[@vbwagner](https://github.com/vbwagner)|1 +@waaeer|[@waaeer](https://github.com/waaeer)|1 + +--- + +Auto-generated by [gaocegege/maintainer](https://github.com/maintainer-org/maintainer) on 2023-08-03. diff --git a/LICENSE b/LICENSE index a4d1d89..c1393f3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,11 +1,19 @@ -ptrack is released under the PostgreSQL License, a liberal Open Source license, similar to the BSD or MIT licenses. +PostgreSQL License -Copyright (c) 2015-2020, Postgres Professional -Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group -Portions Copyright (c) 1994, The Regents of the University of California +Copyright (c) 2019-2023, Postgres Professional -Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies. +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose, without fee, and without a written agreement is +hereby granted, provided that the above copyright notice and this paragraph +and the following two paragraphs appear in all copies. -IN NO EVENT SHALL POSTGRES PROFESSIONAL BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF POSTGRES PROFESSIONAL HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +IN NO EVENT SHALL Postgres Professional BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, +SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING +OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF Postgres Professional +HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -POSTGRES PROFESSIONAL SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND POSTGRES PROFESSIONAL HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. +Postgres Professional SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, +AND Postgres Professional HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, +ENHANCEMENTS, OR MODIFICATIONS. diff --git a/Makefile b/Makefile index a424adb..499067a 100644 --- a/Makefile +++ b/Makefile @@ -1,27 +1,85 @@ + # contrib/ptrack/Makefile MODULE_big = ptrack -OBJS = ptrack.o $(WIN32RES) +OBJS = ptrack.o datapagemap.o engine.o $(WIN32RES) +PGFILEDESC = "ptrack - block-level incremental backup engine" + EXTENSION = ptrack -EXTVERSION = 2.0 -DATA = ptrack.sql -DATA_built = $(EXTENSION)--$(EXTVERSION).sql -PGFILEDESC = "ptrack - public API for internal ptrack engine" +EXTVERSION = 2.4 +DATA = ptrack--2.1.sql ptrack--2.0--2.1.sql ptrack--2.1--2.2.sql ptrack--2.2--2.3.sql \ + ptrack--2.3--2.4.sql -EXTRA_CLEAN = $(EXTENSION)--$(EXTVERSION).sql +TAP_TESTS = 1 + +# This line to link with pgport.lib on Windows compilation +# with Mkvcbuild.pm on PGv15+ +PG_LIBS_INTERNAL += $(libpq_pgport) -ifdef USE_PGXS PG_CONFIG ?= pg_config + +ifdef USE_PGXS PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) else -subdir = contrib/ptrack top_builddir = ../.. +# Makefile.global is a build artifact and initially may not be available +ifneq ($(wildcard $(top_builddir)/src/Makefile.global), ) include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif +endif + +# Assuming make is started in the ptrack directory +patch: + @cd $(top_builddir) && \ + echo Applying the ptrack patch... && \ + git apply --3way -v $(CURDIR)/patches/${PG_BRANCH}-ptrack-core.diff +ifeq ($(MODE), paranoia) + @echo Applying turn-off-hint-bits.diff for the paranoia mode... && \ + git apply --3way -v $(CURDIR)/patches/turn-off-hint-bits.diff +endif + +NPROC ?= $(shell nproc) +prefix := $(abspath $(top_builddir)/pgsql) +TEST_MODE ?= normal +# Postgres Makefile skips some targets depending on the MAKELEVEL variable so +# reset it when calling install targets as if they are started directly from the +# command line +install-postgres: + @cd $(top_builddir) && \ + if [ "$(TEST_MODE)" = legacy ]; then \ + ./configure CFLAGS='-DEXEC_BACKEND' --disable-atomics --prefix=$(prefix) --enable-debug --enable-cassert --enable-depend --enable-tap-tests --quiet; \ + else \ + ./configure --prefix=$(prefix) --enable-debug --enable-cassert --enable-depend --enable-tap-tests; \ + fi && \ + $(MAKE) -sj $(NPROC) install MAKELEVEL=0 && \ + $(MAKE) -sj $(NPROC) -C contrib/ install MAKELEVEL=0 + +# Now when Postgres is built call all remainig targets with USE_PGXS=1 + +test-tap: +ifeq ($(TEST_MODE), legacy) + setarch x86_64 --addr-no-randomize $(MAKE) installcheck USE_PGXS=$(USE_PGXS) PG_CONFIG=$(PG_CONFIG) +else + $(MAKE) installcheck USE_PGXS=$(USE_PGXS) PG_CONFIG=$(PG_CONFIG) +endif + +pg_probackup_dir = ../pg_probackup +# Pg_probackup's Makefile uses top_srcdir when building via PGXS so set it when calling this target +# At the moment building pg_probackup with multiple threads may run some jobs too early and end with an error so do not set the -j option +install-pg-probackup: + $(MAKE) -C $(pg_probackup_dir) install USE_PGXS=$(USE_PGXS) PG_CONFIG=$(PG_CONFIG) top_srcdir=$(top_srcdir) -$(EXTENSION)--$(EXTVERSION).sql: ptrack.sql - cat $^ > $@ +test-python: + cd $(pg_probackup_dir); \ + env="PG_PROBACKUP_PTRACK=ON PG_CONFIG=$(PG_CONFIG)"; \ + if [ "$(TEST_MODE)" = normal ]; then \ + env="$$env PG_PROBACKUP_TEST_BASIC=ON"; \ + elif [ "$(TEST_MODE)" = paranoia ]; then \ + env="$$env PG_PROBACKUP_PARANOIA=ON"; \ + fi; \ + env $$env python3 -m pytest -svv$(if $(shell python3 -m pytest --help | grep '\-n '), -n $(NPROC))$(if $(TESTS), -k '$(TESTS)') tests/ptrack_test.py -temp-install: EXTRA_INSTALL=contrib/ptrack +coverage: + gcov *.c *.h diff --git a/README.md b/README.md index d39469a..1dd4a94 100644 --- a/README.md +++ b/README.md @@ -1,54 +1,246 @@ +[![Test](https://github.com/postgrespro/ptrack/actions/workflows/test.yml/badge.svg)](https://github.com/postgrespro/ptrack/actions/workflows/test.yml) +[![Codecov](https://codecov.io/gh/postgrespro/ptrack/branch/master/graph/badge.svg)](https://codecov.io/gh/postgrespro/ptrack) +[![GitHub release](https://img.shields.io/github/v/release/postgrespro/ptrack?include_prereleases)](https://github.com/postgrespro/ptrack/releases/latest) + # ptrack ## Overview -Ptrack is a fast block-level incremental backup engine for PostgreSQL. Currently `ptrack` codebase is split approximately 50%/50% between PostgreSQL core patch and extension. All public SQL API methods are placed in the `ptrack` extension, while the main engine is still in core. +Ptrack is a block-level incremental backup engine for PostgreSQL. You can [effectively use](https://postgrespro.github.io/pg_probackup/#pbk-setting-up-ptrack-backups) `ptrack` engine for taking incremental backups with [pg_probackup](https://github.com/postgrespro/pg_probackup) backup and recovery manager for PostgreSQL. + +It is designed to allow false positives (i.e. block/page is marked in the `ptrack` map, but actually has not been changed), but to never allow false negatives (i.e. loosing any `PGDATA` changes, excepting hint-bits). + +Currently, `ptrack` codebase is split between small PostgreSQL core patch and extension. All public SQL API methods and main engine are placed in the `ptrack` extension, while the core patch contains only certain hooks and modifies binary utilities to ignore `ptrack.map.*` files. + +This extension is compatible with PostgreSQL [11](patches/REL_11_STABLE-ptrack-core.diff), [12](patches/REL_12_STABLE-ptrack-core.diff), [13](patches/REL_13_STABLE-ptrack-core.diff), [14](patches/REL_14_STABLE-ptrack-core.diff), [15](patches/REL_15_STABLE-ptrack-core.diff). ## Installation -1) Get latest PostgreSQL sources: +1) Specify the PostgreSQL branch to work with: + +```shell +export PG_BRANCH=REL_15_STABLE +``` + +2) Get the latest PostgreSQL sources: + +```shell +git clone https://github.com/postgres/postgres.git -b $PG_BRANCH +``` + +3) Get the latest `ptrack` sources: + +```shell +git clone https://github.com/postgrespro/ptrack.git postgres/contrib/ptrack +``` + +4) Change to the `ptrack` directory: + +```shell +cd postgres/contrib/ptrack +``` + +5) Apply the PostgreSQL core patch: ```shell -git clone https://github.com/postgres/postgres.git -b REL_12_STABLE && cd postgres +make patch ``` -2) Apply PostgreSQL core patch: +6) Compile and install PostgreSQL: ```shell -git apply ptrack/patches/ptrack-2.0-core.diff +make install-postgres prefix=$PWD/pgsql # or some other prefix of your choice ``` -3) Compile and install PostgreSQL +7) Add the newly created binaries to the PATH: -4) Set `ptrack_map_size` (in MB) +```shell +export PATH=$PWD/pgsql/bin:$PATH +``` + +8) Compile and install `ptrack`: ```shell -echo 'ptrack_map_size = 64' >> postgres_data/postgresql.conf +make install USE_PGXS=1 ``` -5) Compile and install `ptrack` extension +9) Set `ptrack.map_size` (in MB): ```shell -USE_PGXS=1 make -C /path/to/ptrack/ install +echo "shared_preload_libraries = 'ptrack'" >> /postgresql.conf +echo "ptrack.map_size = 64" >> /postgresql.conf ``` -6) Run PostgreSQL and create `ptrack` extension +10) Run PostgreSQL and create the `ptrack` extension: ```sql -CREATE EXTENSION ptrack; +postgres=# CREATE EXTENSION ptrack; ``` +## Configuration + +The only one configurable option is `ptrack.map_size` (in MB). Default is `0`, which means `ptrack` is turned off. In order to reduce number of false positives it is recommended to set `ptrack.map_size` to `1 / 1000` of expected `PGDATA` size (i.e. `1000` for a 1 TB database). + +To disable `ptrack` and clean up all remaining service files set `ptrack.map_size` to `0`. + ## Public SQL API - * ptrack_version() --- returns ptrack version string (2.0 currently). - * pg_ptrack_get_pagemapset('LSN') --- returns a set of changed data files with bitmaps of changed blocks since specified LSN. - * pg_ptrack_control_lsn() --- returns LSN of the last ptrack map initialization. - * pg_ptrack_get_block --- returns a requested block of relation. + * ptrack_version() — returns ptrack version string. + * ptrack_init_lsn() — returns LSN of the last ptrack map initialization. + * ptrack_get_pagemapset(start_lsn pg_lsn) — returns a set of changed data files with a number of changed blocks and their bitmaps since specified `start_lsn`. + * ptrack_get_change_stat(start_lsn pg_lsn) — returns statistic of changes (number of files, pages and size in MB) since specified `start_lsn`. + +Usage example: + +```sql +postgres=# SELECT ptrack_version(); + ptrack_version +---------------- + 2.4 +(1 row) + +postgres=# SELECT ptrack_init_lsn(); + ptrack_init_lsn +----------------- + 0/1814408 +(1 row) + +postgres=# SELECT * FROM ptrack_get_pagemapset('0/185C8C0'); + path | pagecount | pagemap +---------------------+-----------+---------------------------------------- + base/16384/1255 | 3 | \x001000000005000000000000 + base/16384/2674 | 3 | \x0000000900010000000000000000 + base/16384/2691 | 1 | \x00004000000000000000000000 + base/16384/2608 | 1 | \x000000000000000400000000000000000000 + base/16384/2690 | 1 | \x000400000000000000000000 +(5 rows) + +postgres=# SELECT * FROM ptrack_get_change_stat('0/285C8C8'); + files | pages | size, MB +-------+-------+------------------------ + 20 | 25 | 0.19531250000000000000 +(1 row) +``` + +## Upgrading + +Usually, you have to only install new version of `ptrack` and do `ALTER EXTENSION ptrack UPDATE;`. However, some specific actions may be required as well: + +#### Upgrading from 2.0.0 to 2.1.*: + +* Put `shared_preload_libraries = 'ptrack'` into `postgresql.conf`. +* Rename `ptrack_map_size` to `ptrack.map_size`. +* Do `ALTER EXTENSION ptrack UPDATE;`. +* Restart your server. + +#### Upgrading from 2.1.* to 2.2.*: + +Since version 2.2 we use a different algorithm for tracking changed pages. Thus, data recorded in the `ptrack.map` using pre 2.2 versions of `ptrack` is incompatible with newer versions. After extension upgrade and server restart old `ptrack.map` will be discarded with `WARNING` and initialized from the scratch. + +#### Upgrading from 2.2.* to 2.3.*: + +* Stop your server +* Update ptrack binaries +* Remove global/ptrack.map.mmap if it exist in server data directory +* Start server +* Do `ALTER EXTENSION ptrack UPDATE;`. + +#### Upgrading from 2.3.* to 2.4.*: + +* Stop your server +* Update ptrack binaries +* Start server +* Do `ALTER EXTENSION ptrack UPDATE;`. + +## Limitations + +1. You can only use `ptrack` safely with `wal_level >= 'replica'`. Otherwise, you can lose tracking of some changes if crash-recovery occurs, since [certain commands are designed not to write WAL at all if wal_level is minimal](https://www.postgresql.org/docs/12/populate.html#POPULATE-PITR), but we only durably flush `ptrack` map at checkpoint time. + +2. The only one production-ready backup utility, that fully supports `ptrack` is [pg_probackup](https://github.com/postgrespro/pg_probackup). + +3. You cannot resize `ptrack` map in runtime, only on postmaster start. Also, you will loose all tracked changes, so it is recommended to do so in the maintainance window and accompany this operation with full backup. + +4. You will need up to `ptrack.map_size * 2` of additional disk space, since `ptrack` uses additional temporary file for durability purpose. See [Architecture section](#Architecture) for details. + +## Benchmarks + +Briefly, an overhead of using `ptrack` on TPS usually does not exceed a couple of percent (~1-3%) for a database of dozens to hundreds of gigabytes in size, while the backup time scales down linearly with backup size with a coefficient ~1. It means that an incremental `ptrack` backup of a database with only 20% of changed pages will be 5 times faster than a full backup. More details [here](benchmarks). ## Architecture -TBA +We use a single shared hash table in `ptrack`. Due to the fixed size of the map there may be false positives (when some block is marked as changed without being actually modified), but not false negative results. However, these false postives may be completely eliminated by setting a high enough `ptrack.map_size`. + +All reads/writes are made using atomic operations on `uint64` entries, so the map is completely lockless during the normal PostgreSQL operation. Because we do not use locks for read/write access, `ptrack` keeps a map (`ptrack.map`) since the last checkpoint intact and uses up to 1 additional temporary file: + +* temporary file `ptrack.map.tmp` to durably replace `ptrack.map` during checkpoint. + +Map is written on disk at the end of checkpoint atomically block by block involving the CRC32 checksum calculation that is checked on the next whole map re-read after crash-recovery or restart. + +To gather the whole changeset of modified blocks in `ptrack_get_pagemapset()` we walk the entire `PGDATA` (`base/**/*`, `global/*`, `pg_tblspc/**/*`) and verify using map whether each block of each relation was modified since the specified LSN or not. + +## Contribution + +Feel free to [send a pull request](https://github.com/postgrespro/ptrack/compare), [create an issue](https://github.com/postgrespro/ptrack/issues/new) or [reach us by e-mail](mailto:team-wd40@lists.postgrespro.ru??subject=[GitHub]%20Ptrack) if you are interested in `ptrack`. + +## Tests + +All changes of the source code in this repository are checked by CI - see commit statuses and the project status badge. You can also run tests locally by executing a few Makefile targets. + +### Prerequisites + +To run Python tests install the following packages: + +OS packages: + - python3-pip + - python3-six + - python3-pytest + - python3-pytest-xdist + +PIP packages: + - testgres + +For example, for Ubuntu: + +```shell +sudo apt update +sudo apt install python3-pip python3-six python3-pytest python3-pytest-xdist +sudo pip3 install testgres +``` + +### Testing + +Install PostgreSQL and ptrack as described in [Installation](#installation), install the testing prerequisites, then do (assuming the current directory is `ptrack`): +```shell +git clone https://github.com/postgrespro/pg_probackup.git ../pg_probackup # clone the repository into postgres/contrib/pg_probackup +# remember to export PATH=/path/to/pgsql/bin:$PATH +make install-pg-probackup USE_PGXS=1 top_srcdir=../.. +make test-tap USE_PGXS=1 +make test-python +``` + +If `pg_probackup` is not located in `postgres/contrib` then additionally specify the path to the `pg_probackup` directory when building `pg_probackup`: +```shell +make install-pg-probackup USE_PGXS=1 top_srcdir=/path/to/postgres pg_probackup_dir=/path/to/pg_probackup +``` + +You can use a public Docker image which already has the necessary build environment (but not the testing prerequisites): + +```shell +docker run -e USER_ID=`id -u` -it -v $PWD:/work --name=ptrack ghcr.io/postgres-dev/ubuntu-22.04:1.0 +dev@a033797d2f73:~$ +``` + +## Environment variables + +| Variable | Possible values | Required | Default value | Description | +| - | - | - | - | - | +| NPROC | An integer greater than 0 | No | Output of `nproc` | The number of threads used for building and running tests | +| PG_CONFIG | File path | No | pg_config (from the PATH) | The path to the `pg_config` binary | +| TESTS | A Pytest filter expression | No | Not set (run all Python tests) | A filter to include only selected tests into the run. See the Pytest `-k` option for more information. This variable is only applicable to `test-python` for the tests located in [tests](https://github.com/postgrespro/pg_probackup/tree/master/tests). | +| TEST_MODE | normal, legacy, paranoia | No | normal | The "legacy" mode runs tests in an environment similar to a 32-bit Windows system. This mode is only applicable to `test-tap`. The "paranoia" mode compares the checksums of each block of the database catalog (PGDATA) contents before making a backup and after the restoration. This mode is only applicable to `test-python`.| -## Roadmap +### TODO -The main goal currently is to move as much `ptrack` functionality into the extension as possible and leave only certain requred hooks as core patch. +* Should we introduce `ptrack.map_path` to allow `ptrack` service files storage outside of `PGDATA`? Doing that we will avoid patching PostgreSQL binary utilities to ignore `ptrack.map.*` files. +* Can we resize `ptrack` map on restart but keep the previously tracked changes? +* Can we write a formal proof, that we never loose any modified page with `ptrack`? With TLA+? diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000..75efe50 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,97 @@ +# Ptrack benchmarks + +## Runtime overhead + +First target was to measure `ptrack` overhead influence on TPS due to marking modified pages in the map in memory. We used PostgreSQL 12 (REL_12_STABLE) cluster of approximately 1 GB size, initialized with `pgbench` on a `tmpfs` partition: + +```sh +pgbench -i -s 133 +``` + +Default `pgbench` transaction script [were modified](pgb.sql) to exclude `pgbench_tellers` and `pgbench_branches` updates in order to lower lock contention and make `ptrack` overhead more visible. So `pgbench` was invoked as following: + +```sh +pgbench -s133 -c40 -j1 -n -P15 -T300 -f pgb.sql +``` + +Results: + +| ptrack.map_size, MB | 0 (turned off) | 32 | 64 | 256 | 512 | 1024 | +|---------------------|----------------|----|----|-----|-----|------| +| TPS | 16900 | 16890 | 16855 | 16468 | 16490 | 16220 | + +TPS fluctuates in a several percent range around 16500 on the used machine, but in average `ptrack` overhead does not exceed 1-3% for any reasonable `ptrack.map_size`. It only becomes noticeable closer to 1 GB `ptrack.map_size` (~3-4%), which is enough to track changes in the database of up to 1 TB size without false positives. + + + + +## Backups speedup + +To test incremental backups speed a fresh cluster were initialized with following DDL: + +```sql +CREATE TABLE large_test (num1 bigint, num2 double precision, num3 double precision); +CREATE TABLE large_test2 (num1 bigint, num2 double precision, num3 double precision); +``` + +These relations were populated with approximately 2 GB of data that way: + +```sql +INSERT INTO large_test (num1, num2, num3) +SELECT s, random(), random()*142 +FROM generate_series(1, 20000000) s; +``` + +Then a part of one relation was touched with a following query: + +```sql +UPDATE large_test2 SET num3 = num3 + 1 WHERE num1 < 20000000 / 5; +``` + +After that, incremental `ptrack` backups were taken with `pg_probackup` followed by full backups. Tests show that `ptrack_backup_time / full_backup_time ~= ptrack_backup_size / full_backup_size`, i.e. if only 20% of data were modified, then `ptrack` backup will be 5 times faster than full backup. Thus, the overhead of building `ptrack` map during backup is minimal. Example: + +```sh +21:02:43 postgres:~/dev/ptrack_test$ time pg_probackup backup -B $(pwd)/backup --instance=node -p5432 -b ptrack --no-sync --stream +INFO: Backup start, pg_probackup version: 2.3.1, instance: node, backup ID: QAA89O, backup mode: PTRACK, wal mode: STREAM, remote: false, compress-algorithm: none, compress-level: 1 +INFO: Parent backup: QAA7FL +INFO: PGDATA size: 2619MB +INFO: Extracting pagemap of changed blocks +INFO: Pagemap successfully extracted, time elapsed: 0 sec +INFO: Start transferring data files +INFO: Data files are transferred, time elapsed: 3s +INFO: wait for pg_stop_backup() +INFO: pg_stop backup() successfully executed +WARNING: Backup files are not synced to disk +INFO: Validating backup QAA89O +INFO: Backup QAA89O data files are valid +INFO: Backup QAA89O resident size: 632MB +INFO: Backup QAA89O completed + +real 0m11.574s +user 0m1.924s +sys 0m1.100s + +21:20:23 postgres:~/dev/ptrack_test$ time pg_probackup backup -B $(pwd)/backup --instance=node -p5432 -b full --no-sync --stream +INFO: Backup start, pg_probackup version: 2.3.1, instance: node, backup ID: QAA8A6, backup mode: FULL, wal mode: STREAM, remote: false, compress-algorithm: none, compress-level: 1 +INFO: PGDATA size: 2619MB +INFO: Start transferring data files +INFO: Data files are transferred, time elapsed: 32s +INFO: wait for pg_stop_backup() +INFO: pg_stop backup() successfully executed +WARNING: Backup files are not synced to disk +INFO: Validating backup QAA8A6 +INFO: Backup QAA8A6 data files are valid +INFO: Backup QAA8A6 resident size: 2653MB +INFO: Backup QAA8A6 completed + +real 0m42.629s +user 0m8.904s +sys 0m11.960s +``` \ No newline at end of file diff --git a/benchmarks/pgb.sql b/benchmarks/pgb.sql new file mode 100644 index 0000000..34211fc --- /dev/null +++ b/benchmarks/pgb.sql @@ -0,0 +1,9 @@ +\set aid random(1, 100000 * :scale) +\set bid random(1, 1 * :scale) +\set tid random(1, 10 * :scale) +\set delta random(-5000, 5000) +BEGIN; +UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid; +SELECT abalance FROM pgbench_accounts WHERE aid = :aid; +INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP); +END; diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..00b744e --- /dev/null +++ b/codecov.yml @@ -0,0 +1,16 @@ +codecov: + notify: + # must be equal to the total number of parallel jobs in a CI pipeline + # (Postgres versions x test types x test modes x OSes minus excluded + # combinations) + after_n_builds: 10 + +# datapagemap.c/.h are copied from Postgres, so let's remove it +# from report. Otherwise, we would have to remove some currently +# unused methods to do not spoil the report. +ignore: + - "**/datapagemap.*" + - "**/*datapagemap*" + - "datapagemap.*" + - "datapagemap.c" + - "datapagemap.h" diff --git a/datapagemap.c b/datapagemap.c new file mode 100644 index 0000000..fb0da97 --- /dev/null +++ b/datapagemap.c @@ -0,0 +1,126 @@ +/*------------------------------------------------------------------------- + * + * datapagemap.c + * A data structure for keeping track of data pages that have changed. + * + * This is a fairly simple bitmap. + * + * Copyright (c) 2013-2020, PostgreSQL Global Development Group + * Copyright (c) 2019-2020, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "datapagemap.h" + +struct datapagemap_iterator +{ + datapagemap_t *map; + BlockNumber nextblkno; +}; + +/* + * Add a block to the bitmap. + */ +void +datapagemap_add(datapagemap_t *map, BlockNumber blkno) +{ + int offset; + int bitno; + + offset = blkno / 8; + bitno = blkno % 8; + + /* enlarge or create bitmap if needed */ + if (map->bitmapsize <= offset) + { + int oldsize = map->bitmapsize; + int newsize; + + /* + * The minimum to hold the new bit is offset + 1. But add some + * headroom, so that we don't need to repeatedly enlarge the bitmap in + * the common case that blocks are modified in order, from beginning + * of a relation to the end. + */ + newsize = offset + 1; + newsize += 10; + + if (map->bitmap != NULL) + map->bitmap = repalloc(map->bitmap, newsize); + else + map->bitmap = palloc(newsize); + + /* zero out the newly allocated region */ + memset(&map->bitmap[oldsize], 0, newsize - oldsize); + + map->bitmapsize = newsize; + } + + /* Set the bit */ + map->bitmap[offset] |= (1 << bitno); +} + +/* + * Start iterating through all entries in the page map. + * + * After datapagemap_iterate, call datapagemap_next to return the entries, + * until it returns false. After you're done, use pg_free() to destroy the + * iterator. + */ +datapagemap_iterator_t * +datapagemap_iterate(datapagemap_t *map) +{ + datapagemap_iterator_t *iter; + + iter = palloc(sizeof(datapagemap_iterator_t)); + iter->map = map; + iter->nextblkno = 0; + + return iter; +} + +bool +datapagemap_next(datapagemap_iterator_t *iter, BlockNumber *blkno) +{ + datapagemap_t *map = iter->map; + + for (;;) + { + BlockNumber blk = iter->nextblkno; + int nextoff = blk / 8; + int bitno = blk % 8; + + if (nextoff >= map->bitmapsize) + break; + + iter->nextblkno++; + + if (map->bitmap[nextoff] & (1 << bitno)) + { + *blkno = blk; + return true; + } + } + + /* no more set bits in this bitmap. */ + return false; +} + +/* + * A debugging aid. Prints out the contents of the page map. + */ +void +datapagemap_print(datapagemap_t *map) +{ + datapagemap_iterator_t *iter; + BlockNumber blocknum; + + iter = datapagemap_iterate(map); + while (datapagemap_next(iter, &blocknum)) + elog(DEBUG3, "block %u", blocknum); + + pfree(iter); +} diff --git a/datapagemap.h b/datapagemap.h new file mode 100644 index 0000000..9b730da --- /dev/null +++ b/datapagemap.h @@ -0,0 +1,28 @@ +/*------------------------------------------------------------------------- + * + * datapagemap.h + * + * Copyright (c) 2013-2019, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ +#ifndef DATAPAGEMAP_H +#define DATAPAGEMAP_H + +#include "storage/block.h" + +struct datapagemap +{ + char *bitmap; + int bitmapsize; +}; + +typedef struct datapagemap datapagemap_t; +typedef struct datapagemap_iterator datapagemap_iterator_t; + +extern void datapagemap_add(datapagemap_t *map, BlockNumber blkno); +extern datapagemap_iterator_t *datapagemap_iterate(datapagemap_t *map); +extern bool datapagemap_next(datapagemap_iterator_t *iter, BlockNumber *blkno); +extern void datapagemap_print(datapagemap_t *map); + +#endif /* DATAPAGEMAP_H */ diff --git a/engine.c b/engine.c new file mode 100644 index 0000000..dfd7c84 --- /dev/null +++ b/engine.c @@ -0,0 +1,680 @@ +/* + * engine.c + * Block level incremental backup engine core + * + * Copyright (c) 2019-2022, Postgres Professional + * + * IDENTIFICATION + * ptrack/engine.c + * + * INTERFACE ROUTINES (PostgreSQL side) + * ptrackMapInit() --- allocate new shared ptrack_map + * ptrackCleanFiles() --- remove ptrack files + * assign_ptrack_map_size() --- ptrack_map_size GUC assign callback + * ptrack_walkdir() --- walk directory and mark all blocks of all + * data files in ptrack_map + * ptrack_mark_block() --- mark single page in ptrack_map + * + */ + +#include "postgres.h" + +#include +#include + +#ifndef WIN32 +#include +#endif + +#include "access/htup_details.h" +#include "access/parallel.h" +#include "access/xlog.h" +#if PG_VERSION_NUM >= 150000 +#include "access/xlogrecovery.h" +#include "storage/fd.h" +#endif +#include "catalog/pg_tablespace.h" +#include "miscadmin.h" +#include "port/pg_crc32c.h" +#include "storage/copydir.h" +#if PG_VERSION_NUM >= 120000 +#include "storage/md.h" +#include "storage/sync.h" +#endif +#include "storage/reinit.h" +#include "storage/smgr.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/pg_lsn.h" + +#include "ptrack.h" +#include "engine.h" + +/* + * Check that path is accessible by us and return true if it is + * not a directory. + */ +static bool +ptrack_file_exists(const char *path) +{ + struct stat st; + + Assert(path != NULL); + + if (stat(path, &st) == 0) + return S_ISDIR(st.st_mode) ? false : true; + else if (!(errno == ENOENT || errno == ENOTDIR || errno == EACCES)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not access file \"%s\": %m", path))); + + return false; +} + +/* + * Write a piece of ptrack map to file and update CRC32 value. + */ +static void +ptrack_write_chunk(int fd, pg_crc32c *crc, char *chunk, size_t size) +{ + COMP_CRC32C(*crc, (char *) chunk, size); + + if (write(fd, chunk, size) != size) + { + /* If write didn't set errno, assume problem is no disk space */ + if (errno == 0) + errno = ENOSPC; + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write file \"%s\": %m", PTRACK_PATH_TMP))); + } +} + +/* + * Delete ptrack files when ptrack is disabled. + * + * This is performed by postmaster at start, + * so that there are no concurrent delete issues. + */ +void +ptrackCleanFiles(void) +{ + char ptrack_path[MAXPGPATH]; + char ptrack_path_tmp[MAXPGPATH]; + + sprintf(ptrack_path, "%s/%s", DataDir, PTRACK_PATH); + sprintf(ptrack_path_tmp, "%s/%s", DataDir, PTRACK_PATH_TMP); + + elog(DEBUG1, "ptrack: clean map files"); + + if (ptrack_file_exists(ptrack_path_tmp)) + durable_unlink(ptrack_path_tmp, LOG); + + if (ptrack_file_exists(ptrack_path)) + durable_unlink(ptrack_path, LOG); +} + +/* + * Read ptrack map file into shared memory pointed by ptrack_map. + * This function is called only at startup, + * so data is read directly (without synchronization). + */ +static bool +ptrackMapReadFromFile(const char *ptrack_path) +{ + elog(DEBUG1, "ptrack read map"); + + /* Do actual file read */ + { + int ptrack_fd; + size_t readed; + + ptrack_fd = BasicOpenFile(ptrack_path, O_RDWR | PG_BINARY); + + if (ptrack_fd < 0) + elog(ERROR, "ptrack read map: failed to open map file \"%s\": %m", ptrack_path); + + readed = 0; + do + { + ssize_t last_readed; + + /* + * Try to read as much as possible + * (linux guaranteed only 0x7ffff000 bytes in one read + * operation, see read(2)) + */ + last_readed = read(ptrack_fd, (char *) ptrack_map + readed, PtrackActualSize - readed); + + if (last_readed > 0) + { + readed += last_readed; + } + else if (last_readed == 0) + { + /* + * We don't try to read more that PtrackActualSize and + * file size was already checked in ptrackMapInit() + */ + elog(ERROR, "ptrack read map: unexpected end of file while reading map file \"%s\", expected to read %zu, but read only %zu bytes", + ptrack_path, (size_t)PtrackActualSize, readed); + } + else if (last_readed < 0 && errno != EINTR) + { + ereport(WARNING, + (errcode_for_file_access(), + errmsg("ptrack read map: could not read map file \"%s\": %m", ptrack_path))); + close(ptrack_fd); + return false; + } + } while (readed < PtrackActualSize); + + close(ptrack_fd); + } + + /* Check PTRACK_MAGIC */ + if (strcmp(ptrack_map->magic, PTRACK_MAGIC) != 0) + { + elog(WARNING, "ptrack read map: wrong map format of file \"%s\"", ptrack_path); + return false; + } + + /* Check ptrack version inside old ptrack map */ + if (ptrack_map->version_num != PTRACK_MAP_FILE_VERSION_NUM) + { + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("ptrack read map: map format version %d in the file \"%s\" is incompatible with file format of extension %d", + ptrack_map->version_num, ptrack_path, PTRACK_MAP_FILE_VERSION_NUM), + errdetail("Deleting file \"%s\" and reinitializing ptrack map.", ptrack_path))); + return false; + } + + /* Check CRC */ + { + pg_crc32c crc; + pg_crc32c *file_crc; + + INIT_CRC32C(crc); + COMP_CRC32C(crc, (char *) ptrack_map, PtrackCrcOffset); + FIN_CRC32C(crc); + + file_crc = (pg_crc32c *) ((char *) ptrack_map + PtrackCrcOffset); + + /* + * Read ptrack map values without atomics during initialization, since + * postmaster is the only user right now. + */ + elog(DEBUG1, "ptrack read map: crc %u, file_crc %u, init_lsn %X/%X", + crc, *file_crc, (uint32) (ptrack_map->init_lsn.value >> 32), (uint32) ptrack_map->init_lsn.value); + + if (!EQ_CRC32C(*file_crc, crc)) + { + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("ptrack read map: incorrect checksum of file \"%s\"", ptrack_path), + errdetail("Deleting file \"%s\" and reinitializing ptrack map.", ptrack_path))); + return false; + } + } + + return true; +} + +/* + * Read PTRACK_PATH file into already allocated shared memory, check header and checksum + * or create new file, if there was no PTRACK_PATH file on disk. + */ +void +ptrackMapInit(void) +{ + char ptrack_path[MAXPGPATH]; + struct stat stat_buf; + bool is_new_map = true; + + elog(DEBUG1, "ptrack init"); + + if (ptrack_map_size == 0) + return; + + sprintf(ptrack_path, "%s/%s", DataDir, PTRACK_PATH); + + if (stat(ptrack_path, &stat_buf) == 0) + { + elog(DEBUG3, "ptrack init: map \"%s\" detected, trying to load", ptrack_path); + if (stat_buf.st_size != PtrackActualSize) + { + elog(WARNING, "ptrack init: unexpected \"%s\" file size %zu != " UINT64_FORMAT ", deleting", + ptrack_path, (Size) stat_buf.st_size, PtrackActualSize); + durable_unlink(ptrack_path, LOG); + } + else if (ptrackMapReadFromFile(ptrack_path)) + { + is_new_map = false; + } + else + { + /* + * ptrackMapReadFromFile failed + * this can be crc mismatch, version mismatch and other errors + * We treat it as non fatal and create new map in memory, + * that will be written on disk on checkpoint + */ + elog(WARNING, "ptrack init: broken map file \"%s\", deleting", + ptrack_path); + durable_unlink(ptrack_path, LOG); + } + } + + /* + * Initialyze memory for new map + */ + if (is_new_map) + { + memcpy(ptrack_map->magic, PTRACK_MAGIC, PTRACK_MAGIC_SIZE); + ptrack_map->version_num = PTRACK_MAP_FILE_VERSION_NUM; + ptrack_map->init_lsn.value = InvalidXLogRecPtr; + /* + * Fill entries with InvalidXLogRecPtr + * (InvalidXLogRecPtr is actually 0) + */ + memset(ptrack_map->entries, 0, PtrackContentNblocks * sizeof(pg_atomic_uint64)); + /* + * Last part of memory representation of ptrack_map (crc) is actually unused + * so leave it as it is + */ + } +} + +/* + * Write content of ptrack_map to file. + */ +void +ptrackCheckpoint(void) +{ + int ptrack_tmp_fd; + pg_crc32c crc; + char ptrack_path[MAXPGPATH]; + char ptrack_path_tmp[MAXPGPATH]; + XLogRecPtr init_lsn; + pg_atomic_uint64 buf[PTRACK_BUF_SIZE]; + struct stat stat_buf; + uint64 i = 0; + uint64 j = 0; + + elog(DEBUG1, "ptrack checkpoint"); + + /* + * Set the buffer to all zeros for sanity. Otherwise, if atomics + * simulation via spinlocks is used (e.g. with --disable-atomics) we could + * write garbage into the sema field of pg_atomic_uint64, which will cause + * spinlocks to stuck after restart. + */ + MemSet(buf, 0, sizeof(buf)); + + /* Delete ptrack_map and all related files, if ptrack was switched off */ + if (ptrack_map_size == 0) + { + return; + } + else if (ptrack_map == NULL) + elog(ERROR, "ptrack checkpoint: map is not loaded at checkpoint time"); + + sprintf(ptrack_path_tmp, "%s/%s", DataDir, PTRACK_PATH_TMP); + sprintf(ptrack_path, "%s/%s", DataDir, PTRACK_PATH); + + elog(DEBUG1, "ptrack checkpoint: started"); + + /* Map content is protected with CRC */ + INIT_CRC32C(crc); + + ptrack_tmp_fd = BasicOpenFile(ptrack_path_tmp, + O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY); + + if (ptrack_tmp_fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("ptrack checkpoint: could not create file \"%s\": %m", ptrack_path_tmp))); + + /* + * We are writing ptrack map values to file, but we want to simply map it + * into the memory with mmap after a crash/restart. That way, we have to + * write values taking into account all paddings/alignments. + * + * Write both magic and version_num at once. + */ + + /* + * Previously we read from the field magic, now we read from the beginning + * of the structure PtrackMapHdr. Make sure nothing has changed since then. + */ + StaticAssertStmt( + offsetof(PtrackMapHdr, magic) == 0, + "old write format for PtrackMapHdr.magic and PtrackMapHdr.version_num " + "is not upward-compatible"); + + ptrack_write_chunk(ptrack_tmp_fd, &crc, (char *) ptrack_map, + offsetof(PtrackMapHdr, init_lsn)); + + init_lsn = pg_atomic_read_u64(&ptrack_map->init_lsn); + + /* Set init_lsn during checkpoint if it is not set yet */ + if (init_lsn == InvalidXLogRecPtr) + { + XLogRecPtr new_init_lsn; + + if (RecoveryInProgress()) + new_init_lsn = GetXLogReplayRecPtr(NULL); + else + new_init_lsn = GetXLogInsertRecPtr(); + + pg_atomic_write_u64(&ptrack_map->init_lsn, new_init_lsn); + init_lsn = new_init_lsn; + } + + /* Put init_lsn in the same buffer */ + buf[j].value = init_lsn; + j++; + + /* + * Iterate over ptrack map actual content and sync it to file. It's + * essential to read each element atomically to avoid partial reads, since + * map can be updated concurrently without any lock. + */ + while (i < PtrackContentNblocks) + { + XLogRecPtr lsn; + + /* + * We store LSN values as pg_atomic_uint64 in the ptrack map, but + * pg_atomic_read_u64() returns uint64. That way, we have to put this + * lsn into the buffer array of pg_atomic_uint64's. We are the only + * one who write into this buffer, so we do it without locks. + * + * TODO: is it safe and can we do any better? + */ + lsn = pg_atomic_read_u64(&ptrack_map->entries[i]); + buf[j].value = lsn; + + i++; + j++; + + if (j == PTRACK_BUF_SIZE) + { + size_t writesz = sizeof(buf); /* Up to ~2 GB for buffer size seems + * to be more than enough, so never + * going to overflow. */ + + /* + * We should not have any alignment issues here, since sizeof() + * takes into account all paddings for us. + */ + ptrack_write_chunk(ptrack_tmp_fd, &crc, (char *) buf, writesz); + elog(DEBUG5, "ptrack checkpoint: i " UINT64_FORMAT ", j " UINT64_FORMAT ", writesz %zu PtrackContentNblocks " UINT64_FORMAT, + i, j, writesz, (uint64) PtrackContentNblocks); + + j = 0; + } + } + + /* Write if anything left */ + if ((i + 1) % PTRACK_BUF_SIZE != 0) + { + size_t writesz = sizeof(pg_atomic_uint64) * j; + + ptrack_write_chunk(ptrack_tmp_fd, &crc, (char *) buf, writesz); + elog(DEBUG5, "ptrack checkpoint: final i " UINT64_FORMAT ", j " UINT64_FORMAT ", writesz %zu PtrackContentNblocks " UINT64_FORMAT, + i, j, writesz, (uint64) PtrackContentNblocks); + } + + FIN_CRC32C(crc); + + if (write(ptrack_tmp_fd, &crc, sizeof(crc)) != sizeof(crc)) + { + /* If write didn't set errno, assume problem is no disk space */ + if (errno == 0) + errno = ENOSPC; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("ptrack checkpoint: could not write file \"%s\": %m", ptrack_path_tmp))); + } + + if (pg_fsync(ptrack_tmp_fd) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("ptrack checkpoint: could not fsync file \"%s\": %m", ptrack_path_tmp))); + + if (close(ptrack_tmp_fd) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("ptrack checkpoint: could not close file \"%s\": %m", ptrack_path_tmp))); + + /* And finally replace old file with the new one */ + durable_rename(ptrack_path_tmp, ptrack_path, ERROR); + + /* Sanity check */ + if (stat(ptrack_path, &stat_buf) == 0 && + stat_buf.st_size != PtrackActualSize) + { + elog(ERROR, "ptrack checkpoint: stat_buf.st_size != ptrack_map_size %zu != " UINT64_FORMAT, + (Size) stat_buf.st_size, PtrackActualSize); + } + elog(DEBUG1, "ptrack checkpoint: completed"); +} + +void +assign_ptrack_map_size(int newval, void *extra) +{ + elog(DEBUG1, "assign_ptrack_map_size: MyProc %d newval %d ptrack_map_size " UINT64_FORMAT, + MyProcPid, newval, ptrack_map_size); + + /* Delete ptrack_map and all related files, if ptrack was switched off. */ + if (newval == 0) + { + ptrack_map_size = 0; + return; + } + + if (newval != 0 && !XLogIsNeeded()) + ereport(ERROR, + (errmsg("assign_ptrack_map_size: cannot use ptrack if wal_level is minimal"), + errdetail("Set wal_level to \"replica\" or higher, or turn off ptrack with \"ptrack.map_size=0\""))); + + if (DataDir != NULL && + !IsBootstrapProcessingMode() && + !InitializingParallelWorker) + { + /* Cast to uint64 in order to avoid int32 overflow */ + ptrack_map_size = (uint64) 1024 * 1024 * newval; + + elog(DEBUG1, "assign_ptrack_map_size: ptrack_map_size set to " UINT64_FORMAT, + ptrack_map_size); + } +} + +/* + * Mark all blocks of the file in ptrack_map. + * For use in functions that copy directories bypassing buffer manager. + */ +static void +ptrack_mark_file(Oid dbOid, Oid tablespaceOid, + const char *filepath, const char *filename) +{ + RelFileNodeBackend rnode; + ForkNumber forknum; + BlockNumber blkno, + nblocks = 0; + struct stat stat_buf; +#if PG_VERSION_NUM >= 170000 + RelFileNumber relNumber; + unsigned segno; +#else + int oidchars; + char oidbuf[OIDCHARS + 1]; +#endif + + /* Do not track temporary relations */ + if (looks_like_temp_rel_name(filename)) + return; + + /* Mark of non-temporary relation */ + rnode.backend = InvalidBackendId; + + nodeDb(nodeOf(rnode)) = dbOid; + nodeSpc(nodeOf(rnode)) = tablespaceOid; + +#if PG_VERSION_NUM >= 170000 + if (!parse_filename_for_nontemp_relation(filename, &relNumber, &forknum, &segno)) + return; + + nodeRel(nodeOf(rnode)) = relNumber; +#else + if (!parse_filename_for_nontemp_relation(filename, &oidchars, &forknum)) + return; + + memcpy(oidbuf, filename, oidchars); + oidbuf[oidchars] = '\0'; + nodeRel(nodeOf(rnode)) = atooid(oidbuf); +#endif + + /* Compute number of blocks based on file size */ + if (stat(filepath, &stat_buf) == 0) + nblocks = stat_buf.st_size / BLCKSZ; + + elog(DEBUG1, "ptrack_mark_file %s, nblocks %u rnode db %u spc %u rel %u, forknum %d", + filepath, nblocks, nodeDb(nodeOf(rnode)), nodeSpc(nodeOf(rnode)), nodeRel(nodeOf(rnode)), forknum); + + for (blkno = 0; blkno < nblocks; blkno++) + ptrack_mark_block(rnode, forknum, blkno); +} + +/* + * Mark all files in the given directory in ptrack_map. + * For use in functions that copy directories bypassing buffer manager. + */ +void +ptrack_walkdir(const char *path, Oid tablespaceOid, Oid dbOid) +{ + DIR *dir; + struct dirent *de; + + /* Do not walk during bootstrap and if ptrack is disabled */ + if (ptrack_map_size == 0 + || DataDir == NULL + || IsBootstrapProcessingMode() + || InitializingParallelWorker) + return; + + dir = AllocateDir(path); + + while ((de = ReadDirExtended(dir, path, LOG)) != NULL) + { + char subpath[MAXPGPATH * 2]; + struct stat fst; + int sret; + + CHECK_FOR_INTERRUPTS(); + + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) + continue; + + snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name); + + sret = lstat(subpath, &fst); + + if (sret < 0) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("ptrack_walkdir: could not stat file \"%s\": %m", subpath))); + continue; + } + + if (S_ISREG(fst.st_mode)) + ptrack_mark_file(dbOid, tablespaceOid, subpath, de->d_name); + } + + FreeDir(dir); /* we ignore any error here */ +} + +static void +ptrack_atomic_increase(XLogRecPtr new_lsn, pg_atomic_uint64 *var) +{ + /* + * We use pg_atomic_uint64 here only for alignment purposes, because + * pg_atomic_uint64 is forcedly aligned on 8 bytes during the MSVC build. + */ + pg_atomic_uint64 old_lsn; + + old_lsn.value = pg_atomic_read_u64(var); +#if USE_ASSERT_CHECKING + elog(DEBUG3, "ptrack_mark_block: " UINT64_FORMAT " <- " UINT64_FORMAT, old_lsn.value, new_lsn); +#endif + while (old_lsn.value < new_lsn && + !pg_atomic_compare_exchange_u64(var, (uint64 *) &old_lsn.value, new_lsn)); +} + +/* + * Mark modified block in ptrack_map. + */ +void +ptrack_mark_block(RelFileNodeBackend smgr_rnode, + ForkNumber forknum, BlockNumber blocknum) +{ + PtBlockId bid; + uint64 hash; + size_t slots[2]; + XLogRecPtr new_lsn; + int i; + + if (ptrack_map_size == 0 + || ptrack_map == NULL + || smgr_rnode.backend != InvalidBackendId) /* do not track temporary + * relations */ + return; + + bid.relnode = nodeOf(smgr_rnode); + bid.forknum = forknum; + bid.blocknum = blocknum; + + hash = BID_HASH_FUNC(bid); + slots[0] = (size_t)(hash % PtrackContentNblocks); + slots[1] = (size_t)(((hash << 32) | (hash >> 32)) % PtrackContentNblocks); + + new_lsn = ptrack_set_init_lsn(); + + /* Atomically assign new LSN value to the slots */ + for (i = 0; i < lengthof(slots); i++) + { +#if USE_ASSERT_CHECKING + elog(DEBUG3, "ptrack_mark_block: map[%zu]", slots[i]); +#endif + ptrack_atomic_increase(new_lsn, &ptrack_map->entries[slots[i]]); + } +} + +XLogRecPtr +ptrack_set_init_lsn(void) +{ + XLogRecPtr new_lsn; + + if (ptrack_map_size == 0 || ptrack_map == NULL) + return InvalidXLogRecPtr; + + if (RecoveryInProgress()) + new_lsn = GetXLogReplayRecPtr(NULL); + else + new_lsn = GetXLogInsertRecPtr(); + + /* Atomically assign new init LSN value */ + if (pg_atomic_read_u64(&ptrack_map->init_lsn) == InvalidXLogRecPtr) + { +#if USE_ASSERT_CHECKING + elog(DEBUG3, "ptrack_set_init_lsn: init_lsn"); +#endif + ptrack_atomic_increase(new_lsn, &ptrack_map->init_lsn); + } + return new_lsn; +} diff --git a/engine.h b/engine.h new file mode 100644 index 0000000..7ecddd2 --- /dev/null +++ b/engine.h @@ -0,0 +1,115 @@ +/*------------------------------------------------------------------------- + * + * engine.h + * header for ptrack map for tracking updates of relation's pages + * + * + * Copyright (c) 2019-2022, Postgres Professional + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * ptrack/engine.h + * + *------------------------------------------------------------------------- + */ +#ifndef PTRACK_ENGINE_H +#define PTRACK_ENGINE_H + +/* #include "access/xlogdefs.h" */ +/* #include "port/atomics.h" */ +/* #include "storage/block.h" */ +/* #include "storage/buf.h" */ +/* #include "storage/relfilenode.h" */ +/* #include "storage/smgr.h" */ +/* #include "utils/relcache.h" */ +#include "access/hash.h" + +/* Persistent copy of ptrack.map to restore after crash */ +#define PTRACK_PATH "global/ptrack.map" +/* Used for atomical crash-safe update of ptrack.map */ +#define PTRACK_PATH_TMP "global/ptrack.map.tmp" + +/* + * 8k of 64 bit LSNs is 64 KB, which looks like a reasonable + * buffer size for disk writes. On fast NVMe SSD it gives + * around 20% increase in ptrack checkpoint speed compared + * to PTRACK_BUF_SIZE == 1000, i.e. 8 KB writes. + * (PTRACK_BUS_SIZE is a count of pg_atomic_uint64) + * + * NOTE: but POSIX defines _POSIX_SSIZE_MAX as 32767 (bytes) + */ +#define PTRACK_BUF_SIZE ((uint64) 8000) + +/* Ptrack magic bytes */ +#define PTRACK_MAGIC "ptk" +#define PTRACK_MAGIC_SIZE 4 + +/* + * Header of ptrack map. + */ +typedef struct PtrackMapHdr +{ + /* + * Three magic bytes (+ \0) to be sure, that we are reading ptrack.map + * with a right PtrackMapHdr structure. + */ + char magic[PTRACK_MAGIC_SIZE]; + + /* + * Value of PTRACK_VERSION_NUM at the time of map initialization. + */ + uint32 version_num; + + /* LSN of the moment, when map was last enabled. */ + pg_atomic_uint64 init_lsn; + + /* Followed by the actual map of LSNs */ + pg_atomic_uint64 entries[FLEXIBLE_ARRAY_MEMBER]; + + /* + * At the end of the map CRC of type pg_crc32c is stored. + */ +} PtrackMapHdr; + +typedef PtrackMapHdr * PtrackMap; + +/* Number of elements in ptrack map (LSN array) */ +#define PtrackContentNblocks \ + ((ptrack_map_size - offsetof(PtrackMapHdr, entries) - sizeof(pg_crc32c)) / sizeof(pg_atomic_uint64)) + +/* Actual size of the ptrack map, that we are able to fit into ptrack_map_size */ +#define PtrackActualSize \ + (offsetof(PtrackMapHdr, entries) + PtrackContentNblocks * sizeof(pg_atomic_uint64) + sizeof(pg_crc32c)) + +/* CRC32 value offset in order to directly access it in the shared memory chunk */ +#define PtrackCrcOffset (PtrackActualSize - sizeof(pg_crc32c)) + +/* Block address 'bid' to hash. To get slot position in map should be divided + * with '% PtrackContentNblocks' */ +#define BID_HASH_FUNC(bid) \ + (DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0))) + +/* + * Per process pointer to shared ptrack_map + */ +extern PtrackMap ptrack_map; + +/* + * Size of ptrack map in bytes + * TODO: to be protected by PtrackResizeLock? + */ +extern uint64 ptrack_map_size; +extern int ptrack_map_size_tmp; + +extern void ptrackCheckpoint(void); +extern void ptrackMapInit(void); +extern void ptrackCleanFiles(void); +extern XLogRecPtr ptrack_set_init_lsn(void); + +extern void assign_ptrack_map_size(int newval, void *extra); + +extern void ptrack_walkdir(const char *path, Oid tablespaceOid, Oid dbOid); +extern void ptrack_mark_block(RelFileNodeBackend smgr_rnode, + ForkNumber forkno, BlockNumber blkno); + +#endif /* PTRACK_ENGINE_H */ diff --git a/patches/REL_11_STABLE-ptrack-core.diff b/patches/REL_11_STABLE-ptrack-core.diff new file mode 100644 index 0000000..e78977c --- /dev/null +++ b/patches/REL_11_STABLE-ptrack-core.diff @@ -0,0 +1,258 @@ +diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c +index 3e53b3df6fb..f76bfc2a646 100644 +--- a/src/backend/replication/basebackup.c ++++ b/src/backend/replication/basebackup.c +@@ -209,6 +209,13 @@ static const struct exclude_list_item excludeFiles[] = + {"postmaster.pid", false}, + {"postmaster.opts", false}, + ++ /* ++ * Skip all transient ptrack files, but do copy ptrack.map, since it may ++ * be successfully used immediately after backup. TODO: check, test? ++ */ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map.tmp", false}, ++ + /* end of list */ + {NULL, false} + }; +@@ -224,6 +231,10 @@ static const struct exclude_list_item noChecksumFiles[] = { + {"pg_filenode.map", false}, + {"pg_internal.init", true}, + {"PG_VERSION", false}, ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + #ifdef EXEC_BACKEND + {"config_exec_params", true}, + #endif +diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c +index 4a0d23b11e3..d59009a4c8c 100644 +--- a/src/backend/storage/file/copydir.c ++++ b/src/backend/storage/file/copydir.c +@@ -27,6 +27,8 @@ + #include "miscadmin.h" + #include "pgstat.h" + ++copydir_hook_type copydir_hook = NULL; ++ + /* + * copydir: copy a directory + * +@@ -78,6 +80,9 @@ copydir(char *fromdir, char *todir, bool recurse) + } + FreeDir(xldir); + ++ if (copydir_hook) ++ copydir_hook(todir); ++ + /* + * Be paranoid here and fsync all files to ensure the copy is really done. + * But if fsync is disabled, we're done. +diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c +index 200cc7f657a..d0dcb5c0287 100644 +--- a/src/backend/storage/smgr/md.c ++++ b/src/backend/storage/smgr/md.c +@@ -39,6 +39,7 @@ + #include "utils/memutils.h" + #include "pg_trace.h" + ++ProcessSyncRequests_hook_type ProcessSyncRequests_hook = NULL; + + /* intervals for calling AbsorbFsyncRequests in mdsync and mdpostckpt */ + #define FSYNCS_PER_ABSORB 10 +@@ -114,6 +115,8 @@ typedef struct _MdfdVec + + static MemoryContext MdCxt; /* context for all MdfdVec objects */ + ++mdextend_hook_type mdextend_hook = NULL; ++mdwrite_hook_type mdwrite_hook = NULL; + + /* + * In some contexts (currently, standalone backends and the checkpointer) +@@ -558,6 +561,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + register_dirty_segment(reln, forknum, v); + + Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); ++ ++ if (mdextend_hook) ++ mdextend_hook(reln->smgr_rnode, forknum, blocknum); + } + + /* +@@ -851,6 +857,9 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + + if (!skipFsync && !SmgrIsTemp(reln)) + register_dirty_segment(reln, forknum, v); ++ ++ if (mdwrite_hook) ++ mdwrite_hook(reln->smgr_rnode, forknum, blocknum); + } + + /* +@@ -1329,6 +1338,9 @@ mdsync(void) + CheckpointStats.ckpt_longest_sync = longest; + CheckpointStats.ckpt_agg_sync_time = total_elapsed; + ++ if (ProcessSyncRequests_hook) ++ ProcessSyncRequests_hook(); ++ + /* Flag successful completion of mdsync */ + mdsync_in_progress = false; + } +diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c +index 6fb403a5a8a..6e31ccb3e0f 100644 +--- a/src/bin/pg_resetwal/pg_resetwal.c ++++ b/src/bin/pg_resetwal/pg_resetwal.c +@@ -84,6 +84,7 @@ static void RewriteControlFile(void); + static void FindEndOfXLOG(void); + static void KillExistingXLOG(void); + static void KillExistingArchiveStatus(void); ++static void KillExistingPtrack(void); + static void WriteEmptyXLOG(void); + static void usage(void); + +@@ -516,6 +517,7 @@ main(int argc, char *argv[]) + RewriteControlFile(); + KillExistingXLOG(); + KillExistingArchiveStatus(); ++ KillExistingPtrack(); + WriteEmptyXLOG(); + + printf(_("Write-ahead log reset\n")); +@@ -1201,6 +1203,57 @@ KillExistingArchiveStatus(void) + } + } + ++/* ++ * Remove existing ptrack files ++ */ ++static void ++KillExistingPtrack(void) ++{ ++#define PTRACKDIR "global" ++ ++ DIR *xldir; ++ struct dirent *xlde; ++ char path[MAXPGPATH + sizeof(PTRACKDIR)]; ++ ++ xldir = opendir(PTRACKDIR); ++ if (xldir == NULL) ++ { ++ fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"), ++ progname, PTRACKDIR, strerror(errno)); ++ exit(1); ++ } ++ ++ while (errno = 0, (xlde = readdir(xldir)) != NULL) ++ { ++ if (strcmp(xlde->d_name, "ptrack.map.mmap") == 0 || ++ strcmp(xlde->d_name, "ptrack.map") == 0 || ++ strcmp(xlde->d_name, "ptrack.map.tmp") == 0) ++ { ++ snprintf(path, sizeof(path), "%s/%s", PTRACKDIR, xlde->d_name); ++ if (unlink(path) < 0) ++ { ++ fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"), ++ progname, path, strerror(errno)); ++ exit(1); ++ } ++ } ++ } ++ ++ if (errno) ++ { ++ fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"), ++ progname, PTRACKDIR, strerror(errno)); ++ exit(1); ++ } ++ ++ if (closedir(xldir)) ++ { ++ fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"), ++ progname, PTRACKDIR, strerror(errno)); ++ exit(1); ++ } ++} ++ + + /* + * Write an empty XLOG file, containing only the checkpoint record +diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c +index 197163d5544..fc846e78175 100644 +--- a/src/bin/pg_rewind/filemap.c ++++ b/src/bin/pg_rewind/filemap.c +@@ -118,6 +118,10 @@ static const struct exclude_list_item excludeFiles[] = + {"postmaster.pid", false}, + {"postmaster.opts", false}, + ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + /* end of list */ + {NULL, false} + }; +diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h +index 80241455357..50dca7bf6f4 100644 +--- a/src/include/miscadmin.h ++++ b/src/include/miscadmin.h +@@ -367,7 +367,7 @@ typedef enum ProcessingMode + NormalProcessing /* normal processing */ + } ProcessingMode; + +-extern ProcessingMode Mode; ++extern PGDLLIMPORT ProcessingMode Mode; + + #define IsBootstrapProcessingMode() (Mode == BootstrapProcessing) + #define IsInitProcessingMode() (Mode == InitProcessing) +diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h +index 4fef3e21072..e55430879c3 100644 +--- a/src/include/storage/copydir.h ++++ b/src/include/storage/copydir.h +@@ -13,6 +13,9 @@ + #ifndef COPYDIR_H + #define COPYDIR_H + ++typedef void (*copydir_hook_type) (const char *path); ++extern PGDLLIMPORT copydir_hook_type copydir_hook; ++ + extern void copydir(char *fromdir, char *todir, bool recurse); + extern void copy_file(char *fromfile, char *tofile); + +diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h +index 0298ed1a2bc..24c684771d0 100644 +--- a/src/include/storage/smgr.h ++++ b/src/include/storage/smgr.h +@@ -116,6 +116,17 @@ extern void AtEOXact_SMgr(void); + /* internals: move me elsewhere -- ay 7/94 */ + + /* in md.c */ ++ ++typedef void (*mdextend_hook_type) (RelFileNodeBackend smgr_rnode, ++ ForkNumber forknum, BlockNumber blocknum); ++extern PGDLLIMPORT mdextend_hook_type mdextend_hook; ++typedef void (*mdwrite_hook_type) (RelFileNodeBackend smgr_rnode, ++ ForkNumber forknum, BlockNumber blocknum); ++extern PGDLLIMPORT mdwrite_hook_type mdwrite_hook; ++ ++typedef void (*ProcessSyncRequests_hook_type) (void); ++extern PGDLLIMPORT ProcessSyncRequests_hook_type ProcessSyncRequests_hook; ++ + extern void mdinit(void); + extern void mdclose(SMgrRelation reln, ForkNumber forknum); + extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); +diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm +index b52baa90988..74870c048db 100644 +--- a/src/tools/msvc/Mkvcbuild.pm ++++ b/src/tools/msvc/Mkvcbuild.pm +@@ -33,7 +33,7 @@ my @unlink_on_exit; + # Set of variables for modules in contrib/ and src/test/modules/ + my $contrib_defines = { 'refint' => 'REFINT_VERBOSE' }; + my @contrib_uselibpq = ('dblink', 'oid2name', 'postgres_fdw', 'vacuumlo'); +-my @contrib_uselibpgport = ('oid2name', 'pg_standby', 'vacuumlo'); ++my @contrib_uselibpgport = ('oid2name', 'pg_standby', 'vacuumlo', 'ptrack'); + my @contrib_uselibpgcommon = ('oid2name', 'pg_standby', 'vacuumlo'); + my $contrib_extralibs = undef; + my $contrib_extraincludes = { 'dblink' => ['src/backend'] }; diff --git a/patches/REL_12_STABLE-ptrack-core.diff b/patches/REL_12_STABLE-ptrack-core.diff new file mode 100644 index 0000000..e3feb67 --- /dev/null +++ b/patches/REL_12_STABLE-ptrack-core.diff @@ -0,0 +1,286 @@ +diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c +index 3bc26568eb7..aa282bfe0ab 100644 +--- a/src/backend/replication/basebackup.c ++++ b/src/backend/replication/basebackup.c +@@ -210,6 +210,13 @@ static const struct exclude_list_item excludeFiles[] = + {"postmaster.pid", false}, + {"postmaster.opts", false}, + ++ /* ++ * Skip all transient ptrack files, but do copy ptrack.map, since it may ++ * be successfully used immediately after backup. TODO: check, test? ++ */ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map.tmp", false}, ++ + /* end of list */ + {NULL, false} + }; +@@ -225,6 +232,11 @@ static const struct exclude_list_item noChecksumFiles[] = { + {"pg_filenode.map", false}, + {"pg_internal.init", true}, + {"PG_VERSION", false}, ++ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + #ifdef EXEC_BACKEND + {"config_exec_params", true}, + #endif +diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c +index 30f6200a86f..53e3b22c3e4 100644 +--- a/src/backend/storage/file/copydir.c ++++ b/src/backend/storage/file/copydir.c +@@ -27,6 +27,8 @@ + #include "miscadmin.h" + #include "pgstat.h" + ++copydir_hook_type copydir_hook = NULL; ++ + /* + * copydir: copy a directory + * +@@ -78,6 +80,9 @@ copydir(char *fromdir, char *todir, bool recurse) + } + FreeDir(xldir); + ++ if (copydir_hook) ++ copydir_hook(todir); ++ + /* + * Be paranoid here and fsync all files to ensure the copy is really done. + * But if fsync is disabled, we're done. +diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c +index 050cee5f9a9..75cf67d464f 100644 +--- a/src/backend/storage/smgr/md.c ++++ b/src/backend/storage/smgr/md.c +@@ -86,6 +86,8 @@ typedef struct _MdfdVec + + static MemoryContext MdCxt; /* context for all MdfdVec objects */ + ++mdextend_hook_type mdextend_hook = NULL; ++mdwrite_hook_type mdwrite_hook = NULL; + + /* Populate a file tag describing an md.c segment file. */ + #define INIT_MD_FILETAG(a,xx_rnode,xx_forknum,xx_segno) \ +@@ -422,6 +424,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + register_dirty_segment(reln, forknum, v); + + Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); ++ ++ if (mdextend_hook) ++ mdextend_hook(reln->smgr_rnode, forknum, blocknum); + } + + /* +@@ -692,6 +697,9 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + + if (!skipFsync && !SmgrIsTemp(reln)) + register_dirty_segment(reln, forknum, v); ++ ++ if (mdwrite_hook) ++ mdwrite_hook(reln->smgr_rnode, forknum, blocknum); + } + + /* +diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c +index aff3e885f36..4fffa5df17c 100644 +--- a/src/backend/storage/sync/sync.c ++++ b/src/backend/storage/sync/sync.c +@@ -75,6 +75,8 @@ static MemoryContext pendingOpsCxt; /* context for the above */ + static CycleCtr sync_cycle_ctr = 0; + static CycleCtr checkpoint_cycle_ctr = 0; + ++ProcessSyncRequests_hook_type ProcessSyncRequests_hook = NULL; ++ + /* Intervals for calling AbsorbSyncRequests */ + #define FSYNCS_PER_ABSORB 10 + #define UNLINKS_PER_ABSORB 10 +@@ -420,6 +422,9 @@ ProcessSyncRequests(void) + CheckpointStats.ckpt_longest_sync = longest; + CheckpointStats.ckpt_agg_sync_time = total_elapsed; + ++ if (ProcessSyncRequests_hook) ++ ProcessSyncRequests_hook(); ++ + /* Flag successful completion of ProcessSyncRequests */ + sync_in_progress = false; + } +diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c +index 03c3da3d730..fdfe5c1318e 100644 +--- a/src/bin/pg_checksums/pg_checksums.c ++++ b/src/bin/pg_checksums/pg_checksums.c +@@ -113,6 +113,11 @@ static const struct exclude_list_item skip[] = { + {"pg_filenode.map", false}, + {"pg_internal.init", true}, + {"PG_VERSION", false}, ++ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + #ifdef EXEC_BACKEND + {"config_exec_params", true}, + #endif +diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c +index 349347593cf..99d1e0a4fc0 100644 +--- a/src/bin/pg_resetwal/pg_resetwal.c ++++ b/src/bin/pg_resetwal/pg_resetwal.c +@@ -84,6 +84,7 @@ static void RewriteControlFile(void); + static void FindEndOfXLOG(void); + static void KillExistingXLOG(void); + static void KillExistingArchiveStatus(void); ++static void KillExistingPtrack(void); + static void WriteEmptyXLOG(void); + static void usage(void); + +@@ -513,6 +514,7 @@ main(int argc, char *argv[]) + RewriteControlFile(); + KillExistingXLOG(); + KillExistingArchiveStatus(); ++ KillExistingPtrack(); + WriteEmptyXLOG(); + + printf(_("Write-ahead log reset\n")); +@@ -1121,6 +1123,53 @@ KillExistingArchiveStatus(void) + } + } + ++/* ++ * Remove existing ptrack files ++ */ ++static void ++KillExistingPtrack(void) ++{ ++#define PTRACKDIR "global" ++ ++ DIR *xldir; ++ struct dirent *xlde; ++ char path[MAXPGPATH + sizeof(PTRACKDIR)]; ++ ++ xldir = opendir(PTRACKDIR); ++ if (xldir == NULL) ++ { ++ pg_log_error("could not open directory \"%s\": %m", PTRACKDIR); ++ exit(1); ++ } ++ ++ while (errno = 0, (xlde = readdir(xldir)) != NULL) ++ { ++ if (strcmp(xlde->d_name, "ptrack.map.mmap") == 0 || ++ strcmp(xlde->d_name, "ptrack.map") == 0 || ++ strcmp(xlde->d_name, "ptrack.map.tmp") == 0) ++ { ++ snprintf(path, sizeof(path), "%s/%s", PTRACKDIR, xlde->d_name); ++ if (unlink(path) < 0) ++ { ++ pg_log_error("could not delete file \"%s\": %m", path); ++ exit(1); ++ } ++ } ++ } ++ ++ if (errno) ++ { ++ pg_log_error("could not read directory \"%s\": %m", PTRACKDIR); ++ exit(1); ++ } ++ ++ if (closedir(xldir)) ++ { ++ pg_log_error("could not close directory \"%s\": %m", PTRACKDIR); ++ exit(1); ++ } ++} ++ + + /* + * Write an empty XLOG file, containing only the checkpoint record +diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c +index 56f83d2fb2f..60bb7bf7a3b 100644 +--- a/src/bin/pg_rewind/filemap.c ++++ b/src/bin/pg_rewind/filemap.c +@@ -117,6 +117,10 @@ static const struct exclude_list_item excludeFiles[] = + {"postmaster.pid", false}, + {"postmaster.opts", false}, + ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + /* end of list */ + {NULL, false} + }; +diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h +index 61a24c2e3c6..cbd46d0cb02 100644 +--- a/src/include/miscadmin.h ++++ b/src/include/miscadmin.h +@@ -369,7 +369,7 @@ typedef enum ProcessingMode + NormalProcessing /* normal processing */ + } ProcessingMode; + +-extern ProcessingMode Mode; ++extern PGDLLIMPORT ProcessingMode Mode; + + #define IsBootstrapProcessingMode() (Mode == BootstrapProcessing) + #define IsInitProcessingMode() (Mode == InitProcessing) +diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h +index 525cc6203e1..9481e1c5a88 100644 +--- a/src/include/storage/copydir.h ++++ b/src/include/storage/copydir.h +@@ -13,6 +13,9 @@ + #ifndef COPYDIR_H + #define COPYDIR_H + ++typedef void (*copydir_hook_type) (const char *path); ++extern PGDLLIMPORT copydir_hook_type copydir_hook; ++ + extern void copydir(char *fromdir, char *todir, bool recurse); + extern void copy_file(char *fromfile, char *tofile); + +diff --git a/src/include/storage/md.h b/src/include/storage/md.h +index df24b931613..b32c1e9500f 100644 +--- a/src/include/storage/md.h ++++ b/src/include/storage/md.h +@@ -19,6 +19,13 @@ + #include "storage/smgr.h" + #include "storage/sync.h" + ++typedef void (*mdextend_hook_type) (RelFileNodeBackend smgr_rnode, ++ ForkNumber forknum, BlockNumber blocknum); ++extern PGDLLIMPORT mdextend_hook_type mdextend_hook; ++typedef void (*mdwrite_hook_type) (RelFileNodeBackend smgr_rnode, ++ ForkNumber forknum, BlockNumber blocknum); ++extern PGDLLIMPORT mdwrite_hook_type mdwrite_hook; ++ + /* md storage manager functionality */ + extern void mdinit(void); + extern void mdclose(SMgrRelation reln, ForkNumber forknum); +diff --git a/src/include/storage/sync.h b/src/include/storage/sync.h +index 16428c5f5fb..6b0cd8f8eea 100644 +--- a/src/include/storage/sync.h ++++ b/src/include/storage/sync.h +@@ -50,6 +50,9 @@ typedef struct FileTag + uint32 segno; + } FileTag; + ++typedef void (*ProcessSyncRequests_hook_type) (void); ++extern PGDLLIMPORT ProcessSyncRequests_hook_type ProcessSyncRequests_hook; ++ + extern void InitSync(void); + extern void SyncPreCheckpoint(void); + extern void SyncPostCheckpoint(void); +diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm +index 1bdc33d7168..83b1190775f 100644 +--- a/src/tools/msvc/Mkvcbuild.pm ++++ b/src/tools/msvc/Mkvcbuild.pm +@@ -33,7 +33,7 @@ my @unlink_on_exit; + # Set of variables for modules in contrib/ and src/test/modules/ + my $contrib_defines = { 'refint' => 'REFINT_VERBOSE' }; + my @contrib_uselibpq = ('dblink', 'oid2name', 'postgres_fdw', 'vacuumlo'); +-my @contrib_uselibpgport = ('oid2name', 'pg_standby', 'vacuumlo'); ++my @contrib_uselibpgport = ('oid2name', 'pg_standby', 'vacuumlo', 'ptrack'); + my @contrib_uselibpgcommon = ('oid2name', 'pg_standby', 'vacuumlo'); + my $contrib_extralibs = undef; + my $contrib_extraincludes = { 'dblink' => ['src/backend'] }; diff --git a/patches/REL_13_STABLE-ptrack-core.diff b/patches/REL_13_STABLE-ptrack-core.diff new file mode 100644 index 0000000..5b73162 --- /dev/null +++ b/patches/REL_13_STABLE-ptrack-core.diff @@ -0,0 +1,286 @@ +diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c +index 50ae1f16d0..721b926ad2 100644 +--- a/src/backend/replication/basebackup.c ++++ b/src/backend/replication/basebackup.c +@@ -233,6 +233,13 @@ static const struct exclude_list_item excludeFiles[] = + {"postmaster.pid", false}, + {"postmaster.opts", false}, + ++ /* ++ * Skip all transient ptrack files, but do copy ptrack.map, since it may ++ * be successfully used immediately after backup. TODO: check, test? ++ */ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map.tmp", false}, ++ + /* end of list */ + {NULL, false} + }; +@@ -248,6 +255,11 @@ static const struct exclude_list_item noChecksumFiles[] = { + {"pg_filenode.map", false}, + {"pg_internal.init", true}, + {"PG_VERSION", false}, ++ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + #ifdef EXEC_BACKEND + {"config_exec_params", true}, + #endif +diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c +index 0cf598dd0c..c9c44a4ae7 100644 +--- a/src/backend/storage/file/copydir.c ++++ b/src/backend/storage/file/copydir.c +@@ -27,6 +27,8 @@ + #include "storage/copydir.h" + #include "storage/fd.h" + ++copydir_hook_type copydir_hook = NULL; ++ + /* + * copydir: copy a directory + * +@@ -78,6 +80,9 @@ copydir(char *fromdir, char *todir, bool recurse) + } + FreeDir(xldir); + ++ if (copydir_hook) ++ copydir_hook(todir); ++ + /* + * Be paranoid here and fsync all files to ensure the copy is really done. + * But if fsync is disabled, we're done. +diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c +index 0eacd461cd..c2ef404a1a 100644 +--- a/src/backend/storage/smgr/md.c ++++ b/src/backend/storage/smgr/md.c +@@ -87,6 +87,8 @@ typedef struct _MdfdVec + + static MemoryContext MdCxt; /* context for all MdfdVec objects */ + ++mdextend_hook_type mdextend_hook = NULL; ++mdwrite_hook_type mdwrite_hook = NULL; + + /* Populate a file tag describing an md.c segment file. */ + #define INIT_MD_FILETAG(a,xx_rnode,xx_forknum,xx_segno) \ +@@ -435,6 +437,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + register_dirty_segment(reln, forknum, v); + + Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); ++ ++ if (mdextend_hook) ++ mdextend_hook(reln->smgr_rnode, forknum, blocknum); + } + + /* +@@ -721,6 +726,9 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + + if (!skipFsync && !SmgrIsTemp(reln)) + register_dirty_segment(reln, forknum, v); ++ ++ if (mdwrite_hook) ++ mdwrite_hook(reln->smgr_rnode, forknum, blocknum); + } + + /* +diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c +index 3ded2cdd71..3a596a59f7 100644 +--- a/src/backend/storage/sync/sync.c ++++ b/src/backend/storage/sync/sync.c +@@ -75,6 +75,8 @@ static MemoryContext pendingOpsCxt; /* context for the above */ + static CycleCtr sync_cycle_ctr = 0; + static CycleCtr checkpoint_cycle_ctr = 0; + ++ProcessSyncRequests_hook_type ProcessSyncRequests_hook = NULL; ++ + /* Intervals for calling AbsorbSyncRequests */ + #define FSYNCS_PER_ABSORB 10 + #define UNLINKS_PER_ABSORB 10 +@@ -420,6 +422,9 @@ ProcessSyncRequests(void) + CheckpointStats.ckpt_longest_sync = longest; + CheckpointStats.ckpt_agg_sync_time = total_elapsed; + ++ if (ProcessSyncRequests_hook) ++ ProcessSyncRequests_hook(); ++ + /* Flag successful completion of ProcessSyncRequests */ + sync_in_progress = false; + } +diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c +index ffdc23945c..7ae95866ce 100644 +--- a/src/bin/pg_checksums/pg_checksums.c ++++ b/src/bin/pg_checksums/pg_checksums.c +@@ -114,6 +114,11 @@ static const struct exclude_list_item skip[] = { + {"pg_filenode.map", false}, + {"pg_internal.init", true}, + {"PG_VERSION", false}, ++ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + #ifdef EXEC_BACKEND + {"config_exec_params", true}, + #endif +diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c +index 233441837f..cf7bd073bf 100644 +--- a/src/bin/pg_resetwal/pg_resetwal.c ++++ b/src/bin/pg_resetwal/pg_resetwal.c +@@ -84,6 +84,7 @@ static void RewriteControlFile(void); + static void FindEndOfXLOG(void); + static void KillExistingXLOG(void); + static void KillExistingArchiveStatus(void); ++static void KillExistingPtrack(void); + static void WriteEmptyXLOG(void); + static void usage(void); + +@@ -513,6 +514,7 @@ main(int argc, char *argv[]) + RewriteControlFile(); + KillExistingXLOG(); + KillExistingArchiveStatus(); ++ KillExistingPtrack(); + WriteEmptyXLOG(); + + printf(_("Write-ahead log reset\n")); +@@ -1102,6 +1104,53 @@ KillExistingArchiveStatus(void) + } + } + ++/* ++ * Remove existing ptrack files ++ */ ++static void ++KillExistingPtrack(void) ++{ ++#define PTRACKDIR "global" ++ ++ DIR *xldir; ++ struct dirent *xlde; ++ char path[MAXPGPATH + sizeof(PTRACKDIR)]; ++ ++ xldir = opendir(PTRACKDIR); ++ if (xldir == NULL) ++ { ++ pg_log_error("could not open directory \"%s\": %m", PTRACKDIR); ++ exit(1); ++ } ++ ++ while (errno = 0, (xlde = readdir(xldir)) != NULL) ++ { ++ if (strcmp(xlde->d_name, "ptrack.map.mmap") == 0 || ++ strcmp(xlde->d_name, "ptrack.map") == 0 || ++ strcmp(xlde->d_name, "ptrack.map.tmp") == 0) ++ { ++ snprintf(path, sizeof(path), "%s/%s", PTRACKDIR, xlde->d_name); ++ if (unlink(path) < 0) ++ { ++ pg_log_error("could not delete file \"%s\": %m", path); ++ exit(1); ++ } ++ } ++ } ++ ++ if (errno) ++ { ++ pg_log_error("could not read directory \"%s\": %m", PTRACKDIR); ++ exit(1); ++ } ++ ++ if (closedir(xldir)) ++ { ++ pg_log_error("could not close directory \"%s\": %m", PTRACKDIR); ++ exit(1); ++ } ++} ++ + + /* + * Write an empty XLOG file, containing only the checkpoint record +diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c +index fbb97b5cf1..6cd7f2ae3e 100644 +--- a/src/bin/pg_rewind/filemap.c ++++ b/src/bin/pg_rewind/filemap.c +@@ -124,6 +124,10 @@ static const struct exclude_list_item excludeFiles[] = + {"postmaster.pid", false}, + {"postmaster.opts", false}, + ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + /* end of list */ + {NULL, false} + }; +diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h +index 72e3352398..5c2e016501 100644 +--- a/src/include/miscadmin.h ++++ b/src/include/miscadmin.h +@@ -388,7 +388,7 @@ typedef enum ProcessingMode + NormalProcessing /* normal processing */ + } ProcessingMode; + +-extern ProcessingMode Mode; ++extern PGDLLIMPORT ProcessingMode Mode; + + #define IsBootstrapProcessingMode() (Mode == BootstrapProcessing) + #define IsInitProcessingMode() (Mode == InitProcessing) +diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h +index 5d28f59c1d..0d3f04d8af 100644 +--- a/src/include/storage/copydir.h ++++ b/src/include/storage/copydir.h +@@ -13,6 +13,9 @@ + #ifndef COPYDIR_H + #define COPYDIR_H + ++typedef void (*copydir_hook_type) (const char *path); ++extern PGDLLIMPORT copydir_hook_type copydir_hook; ++ + extern void copydir(char *fromdir, char *todir, bool recurse); + extern void copy_file(char *fromfile, char *tofile); + +diff --git a/src/include/storage/md.h b/src/include/storage/md.h +index 07fd1bb7d0..5294811bc8 100644 +--- a/src/include/storage/md.h ++++ b/src/include/storage/md.h +@@ -19,6 +19,13 @@ + #include "storage/smgr.h" + #include "storage/sync.h" + ++typedef void (*mdextend_hook_type) (RelFileNodeBackend smgr_rnode, ++ ForkNumber forknum, BlockNumber blocknum); ++extern PGDLLIMPORT mdextend_hook_type mdextend_hook; ++typedef void (*mdwrite_hook_type) (RelFileNodeBackend smgr_rnode, ++ ForkNumber forknum, BlockNumber blocknum); ++extern PGDLLIMPORT mdwrite_hook_type mdwrite_hook; ++ + /* md storage manager functionality */ + extern void mdinit(void); + extern void mdopen(SMgrRelation reln); +diff --git a/src/include/storage/sync.h b/src/include/storage/sync.h +index e16ab8e711..88da9686eb 100644 +--- a/src/include/storage/sync.h ++++ b/src/include/storage/sync.h +@@ -50,6 +50,9 @@ typedef struct FileTag + uint32 segno; + } FileTag; + ++typedef void (*ProcessSyncRequests_hook_type) (void); ++extern PGDLLIMPORT ProcessSyncRequests_hook_type ProcessSyncRequests_hook; ++ + extern void InitSync(void); + extern void SyncPreCheckpoint(void); + extern void SyncPostCheckpoint(void); +diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm +index 67b2ea9ee9b..e9a282d5647 100644 +--- a/src/tools/msvc/Mkvcbuild.pm ++++ b/src/tools/msvc/Mkvcbuild.pm +@@ -34,7 +34,7 @@ my @unlink_on_exit; + # Set of variables for modules in contrib/ and src/test/modules/ + my $contrib_defines = { 'refint' => 'REFINT_VERBOSE' }; + my @contrib_uselibpq = ('dblink', 'oid2name', 'postgres_fdw', 'vacuumlo'); +-my @contrib_uselibpgport = ('oid2name', 'pg_standby', 'vacuumlo'); ++my @contrib_uselibpgport = ('oid2name', 'pg_standby', 'vacuumlo', 'ptrack'); + my @contrib_uselibpgcommon = ('oid2name', 'pg_standby', 'vacuumlo'); + my $contrib_extralibs = undef; + my $contrib_extraincludes = { 'dblink' => ['src/backend'] }; diff --git a/patches/REL_14_STABLE-ptrack-core.diff b/patches/REL_14_STABLE-ptrack-core.diff new file mode 100644 index 0000000..88ffcdc --- /dev/null +++ b/patches/REL_14_STABLE-ptrack-core.diff @@ -0,0 +1,286 @@ +diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c +index 50ae1f16d0..721b926ad2 100644 +--- a/src/backend/replication/basebackup.c ++++ b/src/backend/replication/basebackup.c +@@ -233,6 +233,13 @@ static const struct exclude_list_item excludeFiles[] = + {"postmaster.pid", false}, + {"postmaster.opts", false}, + ++ /* ++ * Skip all transient ptrack files, but do copy ptrack.map, since it may ++ * be successfully used immediately after backup. TODO: check, test? ++ */ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map.tmp", false}, ++ + /* end of list */ + {NULL, false} + }; +@@ -248,6 +255,11 @@ static const struct exclude_list_item noChecksumFiles[] = { + {"pg_filenode.map", false}, + {"pg_internal.init", true}, + {"PG_VERSION", false}, ++ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + #ifdef EXEC_BACKEND + {"config_exec_params", true}, + #endif +diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c +index 0cf598dd0c..c9c44a4ae7 100644 +--- a/src/backend/storage/file/copydir.c ++++ b/src/backend/storage/file/copydir.c +@@ -27,6 +27,8 @@ + #include "storage/copydir.h" + #include "storage/fd.h" + ++copydir_hook_type copydir_hook = NULL; ++ + /* + * copydir: copy a directory + * +@@ -78,6 +80,9 @@ copydir(char *fromdir, char *todir, bool recurse) + } + FreeDir(xldir); + ++ if (copydir_hook) ++ copydir_hook(todir); ++ + /* + * Be paranoid here and fsync all files to ensure the copy is really done. + * But if fsync is disabled, we're done. +diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c +index 0eacd461cd..c2ef404a1a 100644 +--- a/src/backend/storage/smgr/md.c ++++ b/src/backend/storage/smgr/md.c +@@ -87,6 +87,8 @@ typedef struct _MdfdVec + + static MemoryContext MdCxt; /* context for all MdfdVec objects */ + ++mdextend_hook_type mdextend_hook = NULL; ++mdwrite_hook_type mdwrite_hook = NULL; + + /* Populate a file tag describing an md.c segment file. */ + #define INIT_MD_FILETAG(a,xx_rnode,xx_forknum,xx_segno) \ +@@ -435,6 +437,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + register_dirty_segment(reln, forknum, v); + + Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); ++ ++ if (mdextend_hook) ++ mdextend_hook(reln->smgr_rnode, forknum, blocknum); + } + + /* +@@ -721,6 +726,9 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + + if (!skipFsync && !SmgrIsTemp(reln)) + register_dirty_segment(reln, forknum, v); ++ ++ if (mdwrite_hook) ++ mdwrite_hook(reln->smgr_rnode, forknum, blocknum); + } + + /* +diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c +index 3ded2cdd71..3a596a59f7 100644 +--- a/src/backend/storage/sync/sync.c ++++ b/src/backend/storage/sync/sync.c +@@ -75,6 +75,8 @@ static MemoryContext pendingOpsCxt; /* context for the above */ + static CycleCtr sync_cycle_ctr = 0; + static CycleCtr checkpoint_cycle_ctr = 0; + ++ProcessSyncRequests_hook_type ProcessSyncRequests_hook = NULL; ++ + /* Intervals for calling AbsorbSyncRequests */ + #define FSYNCS_PER_ABSORB 10 + #define UNLINKS_PER_ABSORB 10 +@@ -420,6 +422,9 @@ ProcessSyncRequests(void) + CheckpointStats.ckpt_longest_sync = longest; + CheckpointStats.ckpt_agg_sync_time = total_elapsed; + ++ if (ProcessSyncRequests_hook) ++ ProcessSyncRequests_hook(); ++ + /* Flag successful completion of ProcessSyncRequests */ + sync_in_progress = false; + } +diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c +index ffdc23945c..7ae95866ce 100644 +--- a/src/bin/pg_checksums/pg_checksums.c ++++ b/src/bin/pg_checksums/pg_checksums.c +@@ -114,6 +114,11 @@ static const struct exclude_list_item skip[] = { + {"pg_filenode.map", false}, + {"pg_internal.init", true}, + {"PG_VERSION", false}, ++ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + #ifdef EXEC_BACKEND + {"config_exec_params", true}, + #endif +diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c +index 233441837f..cf7bd073bf 100644 +--- a/src/bin/pg_resetwal/pg_resetwal.c ++++ b/src/bin/pg_resetwal/pg_resetwal.c +@@ -84,6 +84,7 @@ static void RewriteControlFile(void); + static void FindEndOfXLOG(void); + static void KillExistingXLOG(void); + static void KillExistingArchiveStatus(void); ++static void KillExistingPtrack(void); + static void WriteEmptyXLOG(void); + static void usage(void); + +@@ -513,6 +514,7 @@ main(int argc, char *argv[]) + RewriteControlFile(); + KillExistingXLOG(); + KillExistingArchiveStatus(); ++ KillExistingPtrack(); + WriteEmptyXLOG(); + + printf(_("Write-ahead log reset\n")); +@@ -1102,6 +1104,53 @@ KillExistingArchiveStatus(void) + } + } + ++/* ++ * Remove existing ptrack files ++ */ ++static void ++KillExistingPtrack(void) ++{ ++#define PTRACKDIR "global" ++ ++ DIR *xldir; ++ struct dirent *xlde; ++ char path[MAXPGPATH + sizeof(PTRACKDIR)]; ++ ++ xldir = opendir(PTRACKDIR); ++ if (xldir == NULL) ++ { ++ pg_log_error("could not open directory \"%s\": %m", PTRACKDIR); ++ exit(1); ++ } ++ ++ while (errno = 0, (xlde = readdir(xldir)) != NULL) ++ { ++ if (strcmp(xlde->d_name, "ptrack.map.mmap") == 0 || ++ strcmp(xlde->d_name, "ptrack.map") == 0 || ++ strcmp(xlde->d_name, "ptrack.map.tmp") == 0) ++ { ++ snprintf(path, sizeof(path), "%s/%s", PTRACKDIR, xlde->d_name); ++ if (unlink(path) < 0) ++ { ++ pg_log_error("could not delete file \"%s\": %m", path); ++ exit(1); ++ } ++ } ++ } ++ ++ if (errno) ++ { ++ pg_log_error("could not read directory \"%s\": %m", PTRACKDIR); ++ exit(1); ++ } ++ ++ if (closedir(xldir)) ++ { ++ pg_log_error("could not close directory \"%s\": %m", PTRACKDIR); ++ exit(1); ++ } ++} ++ + + /* + * Write an empty XLOG file, containing only the checkpoint record +diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c +index fbb97b5cf1..6cd7f2ae3e 100644 +--- a/src/bin/pg_rewind/filemap.c ++++ b/src/bin/pg_rewind/filemap.c +@@ -124,6 +124,10 @@ static const struct exclude_list_item excludeFiles[] = + {"postmaster.pid", false}, + {"postmaster.opts", false}, + ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + /* end of list */ + {NULL, false} + }; +diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h +index 72e3352398..5c2e016501 100644 +--- a/src/include/miscadmin.h ++++ b/src/include/miscadmin.h +@@ -388,7 +388,7 @@ typedef enum ProcessingMode + NormalProcessing /* normal processing */ + } ProcessingMode; + +-extern ProcessingMode Mode; ++extern PGDLLIMPORT ProcessingMode Mode; + + #define IsBootstrapProcessingMode() (Mode == BootstrapProcessing) + #define IsInitProcessingMode() (Mode == InitProcessing) +diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h +index 5d28f59c1d..0d3f04d8af 100644 +--- a/src/include/storage/copydir.h ++++ b/src/include/storage/copydir.h +@@ -13,6 +13,9 @@ + #ifndef COPYDIR_H + #define COPYDIR_H + ++typedef void (*copydir_hook_type) (const char *path); ++extern PGDLLIMPORT copydir_hook_type copydir_hook; ++ + extern void copydir(char *fromdir, char *todir, bool recurse); + extern void copy_file(char *fromfile, char *tofile); + +diff --git a/src/include/storage/md.h b/src/include/storage/md.h +index 07fd1bb7d0..5294811bc8 100644 +--- a/src/include/storage/md.h ++++ b/src/include/storage/md.h +@@ -19,6 +19,13 @@ + #include "storage/smgr.h" + #include "storage/sync.h" + ++typedef void (*mdextend_hook_type) (RelFileNodeBackend smgr_rnode, ++ ForkNumber forknum, BlockNumber blocknum); ++extern PGDLLIMPORT mdextend_hook_type mdextend_hook; ++typedef void (*mdwrite_hook_type) (RelFileNodeBackend smgr_rnode, ++ ForkNumber forknum, BlockNumber blocknum); ++extern PGDLLIMPORT mdwrite_hook_type mdwrite_hook; ++ + /* md storage manager functionality */ + extern void mdinit(void); + extern void mdopen(SMgrRelation reln); +diff --git a/src/include/storage/sync.h b/src/include/storage/sync.h +index e16ab8e711..88da9686eb 100644 +--- a/src/include/storage/sync.h ++++ b/src/include/storage/sync.h +@@ -50,6 +50,9 @@ typedef struct FileTag + uint32 segno; + } FileTag; + ++typedef void (*ProcessSyncRequests_hook_type) (void); ++extern PGDLLIMPORT ProcessSyncRequests_hook_type ProcessSyncRequests_hook; ++ + extern void InitSync(void); + extern void SyncPreCheckpoint(void); + extern void SyncPostCheckpoint(void); +diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm +index 9b6539fb15d..4b2bcdb6b88 100644 +--- a/src/tools/msvc/Mkvcbuild.pm ++++ b/src/tools/msvc/Mkvcbuild.pm +@@ -38,7 +38,7 @@ my @unlink_on_exit; + my $contrib_defines = { 'refint' => 'REFINT_VERBOSE' }; + my @contrib_uselibpq = + ('dblink', 'oid2name', 'postgres_fdw', 'vacuumlo', 'libpq_pipeline'); +-my @contrib_uselibpgport = ('libpq_pipeline', 'oid2name', 'vacuumlo'); ++my @contrib_uselibpgport = ('libpq_pipeline', 'oid2name', 'vacuumlo', 'ptrack'); + my @contrib_uselibpgcommon = ('libpq_pipeline', 'oid2name', 'vacuumlo'); + my $contrib_extralibs = { 'libpq_pipeline' => ['ws2_32.lib'] }; + my $contrib_extraincludes = { 'dblink' => ['src/backend'] }; diff --git a/patches/REL_15_STABLE-ptrack-core.diff b/patches/REL_15_STABLE-ptrack-core.diff new file mode 100644 index 0000000..2adc5f3 --- /dev/null +++ b/patches/REL_15_STABLE-ptrack-core.diff @@ -0,0 +1,248 @@ +diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c +index cc16c4b331f..69b1af16cf5 100644 +--- a/src/backend/backup/basebackup.c ++++ b/src/backend/backup/basebackup.c +@@ -197,6 +197,13 @@ static const struct exclude_list_item excludeFiles[] = + {"postmaster.pid", false}, + {"postmaster.opts", false}, + ++ /* ++ * Skip all transient ptrack files, but do copy ptrack.map, since it may ++ * be successfully used immediately after backup. TODO: check, test? ++ */ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map.tmp", false}, ++ + /* end of list */ + {NULL, false} + }; +@@ -212,6 +219,11 @@ static const struct exclude_list_item noChecksumFiles[] = { + {"pg_filenode.map", false}, + {"pg_internal.init", true}, + {"PG_VERSION", false}, ++ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + #ifdef EXEC_BACKEND + {"config_exec_params", true}, + #endif +diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c +index 658fd95ba95..eee38eba176 100644 +--- a/src/backend/storage/file/copydir.c ++++ b/src/backend/storage/file/copydir.c +@@ -27,6 +27,8 @@ + #include "storage/copydir.h" + #include "storage/fd.h" + ++copydir_hook_type copydir_hook = NULL; ++ + /* + * copydir: copy a directory + * +@@ -78,6 +80,9 @@ copydir(char *fromdir, char *todir, bool recurse) + } + FreeDir(xldir); + ++ if (copydir_hook) ++ copydir_hook(todir); ++ + /* + * Be paranoid here and fsync all files to ensure the copy is really done. + * But if fsync is disabled, we're done. +diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c +index a0fc60b32a3..7f091951c0d 100644 +--- a/src/backend/storage/smgr/md.c ++++ b/src/backend/storage/smgr/md.c +@@ -87,6 +87,8 @@ typedef struct _MdfdVec + + static MemoryContext MdCxt; /* context for all MdfdVec objects */ + ++mdextend_hook_type mdextend_hook = NULL; ++mdwrite_hook_type mdwrite_hook = NULL; + + /* Populate a file tag describing an md.c segment file. */ + #define INIT_MD_FILETAG(a,xx_rnode,xx_forknum,xx_segno) \ +@@ -484,6 +486,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + register_dirty_segment(reln, forknum, v); + + Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); ++ ++ if (mdextend_hook) ++ mdextend_hook(reln->smgr_rnode, forknum, blocknum); + } + + /* +@@ -773,6 +778,9 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + + if (!skipFsync && !SmgrIsTemp(reln)) + register_dirty_segment(reln, forknum, v); ++ ++ if (mdwrite_hook) ++ mdwrite_hook(reln->smgr_rnode, forknum, blocknum); + } + + /* +diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c +index e1fb6310038..76d75680b31 100644 +--- a/src/backend/storage/sync/sync.c ++++ b/src/backend/storage/sync/sync.c +@@ -81,6 +81,8 @@ static MemoryContext pendingOpsCxt; /* context for the above */ + static CycleCtr sync_cycle_ctr = 0; + static CycleCtr checkpoint_cycle_ctr = 0; + ++ProcessSyncRequests_hook_type ProcessSyncRequests_hook = NULL; ++ + /* Intervals for calling AbsorbSyncRequests */ + #define FSYNCS_PER_ABSORB 10 + #define UNLINKS_PER_ABSORB 10 +@@ -477,6 +479,9 @@ ProcessSyncRequests(void) + CheckpointStats.ckpt_longest_sync = longest; + CheckpointStats.ckpt_agg_sync_time = total_elapsed; + ++ if (ProcessSyncRequests_hook) ++ ProcessSyncRequests_hook(); ++ + /* Flag successful completion of ProcessSyncRequests */ + sync_in_progress = false; + } +diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c +index 21dfe1b6ee5..266ac1ef40a 100644 +--- a/src/bin/pg_checksums/pg_checksums.c ++++ b/src/bin/pg_checksums/pg_checksums.c +@@ -118,6 +118,11 @@ static const struct exclude_list_item skip[] = { + {"pg_filenode.map", false}, + {"pg_internal.init", true}, + {"PG_VERSION", false}, ++ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + #ifdef EXEC_BACKEND + {"config_exec_params", true}, + #endif +diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c +index d4772a29650..3318f64359d 100644 +--- a/src/bin/pg_resetwal/pg_resetwal.c ++++ b/src/bin/pg_resetwal/pg_resetwal.c +@@ -85,6 +85,7 @@ static void RewriteControlFile(void); + static void FindEndOfXLOG(void); + static void KillExistingXLOG(void); + static void KillExistingArchiveStatus(void); ++static void KillExistingPtrack(void); + static void WriteEmptyXLOG(void); + static void usage(void); + +@@ -488,6 +489,7 @@ main(int argc, char *argv[]) + RewriteControlFile(); + KillExistingXLOG(); + KillExistingArchiveStatus(); ++ KillExistingPtrack(); + WriteEmptyXLOG(); + + printf(_("Write-ahead log reset\n")); +@@ -1036,6 +1038,41 @@ KillExistingArchiveStatus(void) + pg_fatal("could not close directory \"%s\": %m", ARCHSTATDIR); + } + ++/* ++ * Remove existing ptrack files ++ */ ++static void ++KillExistingPtrack(void) ++{ ++#define PTRACKDIR "global" ++ ++ DIR *xldir; ++ struct dirent *xlde; ++ char path[MAXPGPATH + sizeof(PTRACKDIR)]; ++ ++ xldir = opendir(PTRACKDIR); ++ if (xldir == NULL) ++ pg_fatal("could not open directory \"%s\": %m", PTRACKDIR); ++ ++ while (errno = 0, (xlde = readdir(xldir)) != NULL) ++ { ++ if (strcmp(xlde->d_name, "ptrack.map.mmap") == 0 || ++ strcmp(xlde->d_name, "ptrack.map") == 0 || ++ strcmp(xlde->d_name, "ptrack.map.tmp") == 0) ++ { ++ snprintf(path, sizeof(path), "%s/%s", PTRACKDIR, xlde->d_name); ++ if (unlink(path) < 0) ++ pg_fatal("could not delete file \"%s\": %m", path); ++ } ++ } ++ ++ if (errno) ++ pg_fatal("could not read directory \"%s\": %m", PTRACKDIR); ++ ++ if (closedir(xldir)) ++ pg_fatal("could not close directory \"%s\": %m", PTRACKDIR); ++} ++ + + /* + * Write an empty XLOG file, containing only the checkpoint record +diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c +index 62529310415..b496f54fb06 100644 +--- a/src/bin/pg_rewind/filemap.c ++++ b/src/bin/pg_rewind/filemap.c +@@ -157,6 +157,10 @@ static const struct exclude_list_item excludeFiles[] = + {"postmaster.pid", false}, + {"postmaster.opts", false}, + ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + /* end of list */ + {NULL, false} + }; +diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h +index 50a26edeb06..af1602f5154 100644 +--- a/src/include/storage/copydir.h ++++ b/src/include/storage/copydir.h +@@ -13,6 +13,9 @@ + #ifndef COPYDIR_H + #define COPYDIR_H + ++typedef void (*copydir_hook_type) (const char *path); ++extern PGDLLIMPORT copydir_hook_type copydir_hook; ++ + extern void copydir(char *fromdir, char *todir, bool recurse); + extern void copy_file(char *fromfile, char *tofile); + +diff --git a/src/include/storage/md.h b/src/include/storage/md.h +index ffffa40db71..3ff98e0bf01 100644 +--- a/src/include/storage/md.h ++++ b/src/include/storage/md.h +@@ -19,6 +19,13 @@ + #include "storage/smgr.h" + #include "storage/sync.h" + ++typedef void (*mdextend_hook_type) (RelFileNodeBackend smgr_rnode, ++ ForkNumber forknum, BlockNumber blocknum); ++extern PGDLLIMPORT mdextend_hook_type mdextend_hook; ++typedef void (*mdwrite_hook_type) (RelFileNodeBackend smgr_rnode, ++ ForkNumber forknum, BlockNumber blocknum); ++extern PGDLLIMPORT mdwrite_hook_type mdwrite_hook; ++ + /* md storage manager functionality */ + extern void mdinit(void); + extern void mdopen(SMgrRelation reln); +diff --git a/src/include/storage/sync.h b/src/include/storage/sync.h +index 9737e1eb67c..914ad86328f 100644 +--- a/src/include/storage/sync.h ++++ b/src/include/storage/sync.h +@@ -55,6 +55,9 @@ typedef struct FileTag + uint32 segno; + } FileTag; + ++typedef void (*ProcessSyncRequests_hook_type) (void); ++extern PGDLLIMPORT ProcessSyncRequests_hook_type ProcessSyncRequests_hook; ++ + extern void InitSync(void); + extern void SyncPreCheckpoint(void); + extern void SyncPostCheckpoint(void); diff --git a/patches/REL_16_STABLE-ptrack-core.diff b/patches/REL_16_STABLE-ptrack-core.diff new file mode 100644 index 0000000..04cf8a4 --- /dev/null +++ b/patches/REL_16_STABLE-ptrack-core.diff @@ -0,0 +1,261 @@ +diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c +index 45be21131c5..134e677f9d1 100644 +--- a/src/backend/backup/basebackup.c ++++ b/src/backend/backup/basebackup.c +@@ -199,6 +199,13 @@ static const struct exclude_list_item excludeFiles[] = + {"postmaster.pid", false}, + {"postmaster.opts", false}, + ++ /* ++ * Skip all transient ptrack files, but do copy ptrack.map, since it may ++ * be successfully used immediately after backup. TODO: check, test? ++ */ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map.tmp", false}, ++ + /* end of list */ + {NULL, false} + }; +@@ -214,6 +221,11 @@ static const struct exclude_list_item noChecksumFiles[] = { + {"pg_filenode.map", false}, + {"pg_internal.init", true}, + {"PG_VERSION", false}, ++ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + #ifdef EXEC_BACKEND + {"config_exec_params", true}, + #endif +diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c +index e04bc3941ae..996b5de6169 100644 +--- a/src/backend/storage/file/copydir.c ++++ b/src/backend/storage/file/copydir.c +@@ -27,6 +27,8 @@ + #include "storage/copydir.h" + #include "storage/fd.h" + ++copydir_hook_type copydir_hook = NULL; ++ + /* + * copydir: copy a directory + * +@@ -75,6 +77,9 @@ copydir(const char *fromdir, const char *todir, bool recurse) + } + FreeDir(xldir); + ++ if (copydir_hook) ++ copydir_hook(todir); ++ + /* + * Be paranoid here and fsync all files to ensure the copy is really done. + * But if fsync is disabled, we're done. +diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c +index fdecbad1709..f849d00161e 100644 +--- a/src/backend/storage/smgr/md.c ++++ b/src/backend/storage/smgr/md.c +@@ -87,6 +87,8 @@ typedef struct _MdfdVec + + static MemoryContext MdCxt; /* context for all MdfdVec objects */ + ++mdextend_hook_type mdextend_hook = NULL; ++mdwrite_hook_type mdwrite_hook = NULL; + + /* Populate a file tag describing an md.c segment file. */ + #define INIT_MD_FILETAG(a,xx_rlocator,xx_forknum,xx_segno) \ +@@ -515,6 +517,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + register_dirty_segment(reln, forknum, v); + + Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); ++ ++ if (mdextend_hook) ++ mdextend_hook(reln->smgr_rlocator, forknum, blocknum); + } + + /* +@@ -622,6 +627,12 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, + + remblocks -= numblocks; + curblocknum += numblocks; ++ ++ if (mdextend_hook) ++ { ++ for (; blocknum < curblocknum; blocknum++) ++ mdextend_hook(reln->smgr_rlocator, forknum, blocknum); ++ } + } + } + +@@ -867,6 +878,9 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + + if (!skipFsync && !SmgrIsTemp(reln)) + register_dirty_segment(reln, forknum, v); ++ ++ if (mdwrite_hook) ++ mdwrite_hook(reln->smgr_rlocator, forknum, blocknum); + } + + /* +diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c +index 04fcb06056d..22bf179f560 100644 +--- a/src/backend/storage/sync/sync.c ++++ b/src/backend/storage/sync/sync.c +@@ -79,6 +79,8 @@ static MemoryContext pendingOpsCxt; /* context for the above */ + static CycleCtr sync_cycle_ctr = 0; + static CycleCtr checkpoint_cycle_ctr = 0; + ++ProcessSyncRequests_hook_type ProcessSyncRequests_hook = NULL; ++ + /* Intervals for calling AbsorbSyncRequests */ + #define FSYNCS_PER_ABSORB 10 + #define UNLINKS_PER_ABSORB 10 +@@ -475,6 +477,9 @@ ProcessSyncRequests(void) + CheckpointStats.ckpt_longest_sync = longest; + CheckpointStats.ckpt_agg_sync_time = total_elapsed; + ++ if (ProcessSyncRequests_hook) ++ ProcessSyncRequests_hook(); ++ + /* Flag successful completion of ProcessSyncRequests */ + sync_in_progress = false; + } +diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c +index 19eb67e4854..008a7acc9f0 100644 +--- a/src/bin/pg_checksums/pg_checksums.c ++++ b/src/bin/pg_checksums/pg_checksums.c +@@ -118,6 +118,11 @@ static const struct exclude_list_item skip[] = { + {"pg_filenode.map", false}, + {"pg_internal.init", true}, + {"PG_VERSION", false}, ++ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + #ifdef EXEC_BACKEND + {"config_exec_params", true}, + #endif +diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c +index e7ef2b8bd0c..ca7f8cdbc2f 100644 +--- a/src/bin/pg_resetwal/pg_resetwal.c ++++ b/src/bin/pg_resetwal/pg_resetwal.c +@@ -85,6 +85,7 @@ static void RewriteControlFile(void); + static void FindEndOfXLOG(void); + static void KillExistingXLOG(void); + static void KillExistingArchiveStatus(void); ++static void KillExistingPtrack(void); + static void WriteEmptyXLOG(void); + static void usage(void); + +@@ -488,6 +489,7 @@ main(int argc, char *argv[]) + RewriteControlFile(); + KillExistingXLOG(); + KillExistingArchiveStatus(); ++ KillExistingPtrack(); + WriteEmptyXLOG(); + + printf(_("Write-ahead log reset\n")); +@@ -1029,6 +1031,41 @@ KillExistingArchiveStatus(void) + pg_fatal("could not close directory \"%s\": %m", ARCHSTATDIR); + } + ++/* ++ * Remove existing ptrack files ++ */ ++static void ++KillExistingPtrack(void) ++{ ++#define PTRACKDIR "global" ++ ++ DIR *xldir; ++ struct dirent *xlde; ++ char path[MAXPGPATH + sizeof(PTRACKDIR)]; ++ ++ xldir = opendir(PTRACKDIR); ++ if (xldir == NULL) ++ pg_fatal("could not open directory \"%s\": %m", PTRACKDIR); ++ ++ while (errno = 0, (xlde = readdir(xldir)) != NULL) ++ { ++ if (strcmp(xlde->d_name, "ptrack.map.mmap") == 0 || ++ strcmp(xlde->d_name, "ptrack.map") == 0 || ++ strcmp(xlde->d_name, "ptrack.map.tmp") == 0) ++ { ++ snprintf(path, sizeof(path), "%s/%s", PTRACKDIR, xlde->d_name); ++ if (unlink(path) < 0) ++ pg_fatal("could not delete file \"%s\": %m", path); ++ } ++ } ++ ++ if (errno) ++ pg_fatal("could not read directory \"%s\": %m", PTRACKDIR); ++ ++ if (closedir(xldir)) ++ pg_fatal("could not close directory \"%s\": %m", PTRACKDIR); ++} ++ + + /* + * Write an empty XLOG file, containing only the checkpoint record +diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c +index bd5c598e200..a568156c5fb 100644 +--- a/src/bin/pg_rewind/filemap.c ++++ b/src/bin/pg_rewind/filemap.c +@@ -157,6 +157,10 @@ static const struct exclude_list_item excludeFiles[] = + {"postmaster.pid", false}, + {"postmaster.opts", false}, + ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + /* end of list */ + {NULL, false} + }; +diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h +index a8be5b21e0b..020874f96cd 100644 +--- a/src/include/storage/copydir.h ++++ b/src/include/storage/copydir.h +@@ -13,6 +13,9 @@ + #ifndef COPYDIR_H + #define COPYDIR_H + ++typedef void (*copydir_hook_type) (const char *path); ++extern PGDLLIMPORT copydir_hook_type copydir_hook; ++ + extern void copydir(const char *fromdir, const char *todir, bool recurse); + extern void copy_file(const char *fromfile, const char *tofile); + +diff --git a/src/include/storage/md.h b/src/include/storage/md.h +index 941879ee6a8..24738aeecd0 100644 +--- a/src/include/storage/md.h ++++ b/src/include/storage/md.h +@@ -19,6 +19,13 @@ + #include "storage/smgr.h" + #include "storage/sync.h" + ++typedef void (*mdextend_hook_type) (RelFileLocatorBackend smgr_rlocator, ++ ForkNumber forknum, BlockNumber blocknum); ++extern PGDLLIMPORT mdextend_hook_type mdextend_hook; ++typedef void (*mdwrite_hook_type) (RelFileLocatorBackend smgr_rlocator, ++ ForkNumber forknum, BlockNumber blocknum); ++extern PGDLLIMPORT mdwrite_hook_type mdwrite_hook; ++ + /* md storage manager functionality */ + extern void mdinit(void); + extern void mdopen(SMgrRelation reln); +diff --git a/src/include/storage/sync.h b/src/include/storage/sync.h +index cfbcfa6797d..2a432440db9 100644 +--- a/src/include/storage/sync.h ++++ b/src/include/storage/sync.h +@@ -55,6 +55,9 @@ typedef struct FileTag + uint32 segno; + } FileTag; + ++typedef void (*ProcessSyncRequests_hook_type) (void); ++extern PGDLLIMPORT ProcessSyncRequests_hook_type ProcessSyncRequests_hook; ++ + extern void InitSync(void); + extern void SyncPreCheckpoint(void); + extern void SyncPostCheckpoint(void); diff --git a/patches/master-ptrack-core.diff b/patches/master-ptrack-core.diff new file mode 100644 index 0000000..3357a2b --- /dev/null +++ b/patches/master-ptrack-core.diff @@ -0,0 +1,294 @@ +diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c +index 34a2c71812..2d73d8023e 100644 +--- a/src/backend/access/transam/xlog.c ++++ b/src/backend/access/transam/xlog.c +@@ -135,6 +135,7 @@ int wal_retrieve_retry_interval = 5000; + int max_slot_wal_keep_size_mb = -1; + int wal_decode_buffer_size = 512 * 1024; + bool track_wal_io_timing = false; ++backup_checkpoint_request_hook_type backup_checkpoint_request_hook = NULL; + + #ifdef WAL_DEBUG + bool XLOG_DEBUG = false; +@@ -8801,6 +8802,12 @@ do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces, + { + bool checkpointfpw; + ++ /* ++ * Before we call RequestCheckpoint() we need to set ++ * init_lsn for ptrack map ++ */ ++ if (backup_checkpoint_request_hook) ++ backup_checkpoint_request_hook(); + /* + * Force a CHECKPOINT. Aside from being necessary to prevent torn + * page problems, this guarantees that two successive backup runs +diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c +index 9a2bf59e84..ade9115651 100644 +--- a/src/backend/backup/basebackup.c ++++ b/src/backend/backup/basebackup.c +@@ -220,6 +220,13 @@ static const struct exclude_list_item excludeFiles[] = + {"postmaster.pid", false}, + {"postmaster.opts", false}, + ++ /* ++ * Skip all transient ptrack files, but do copy ptrack.map, since it may ++ * be successfully used immediately after backup. TODO: check, test? ++ */ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map.tmp", false}, ++ + /* end of list */ + {NULL, false} + }; +diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c +index d4fbe54207..b108416c70 100644 +--- a/src/backend/storage/file/copydir.c ++++ b/src/backend/storage/file/copydir.c +@@ -27,6 +27,8 @@ + #include "storage/copydir.h" + #include "storage/fd.h" + ++copydir_hook_type copydir_hook = NULL; ++ + /* + * copydir: copy a directory + * +@@ -75,6 +77,9 @@ copydir(const char *fromdir, const char *todir, bool recurse) + } + FreeDir(xldir); + ++ if (copydir_hook) ++ copydir_hook(todir); ++ + /* + * Be paranoid here and fsync all files to ensure the copy is really done. + * But if fsync is disabled, we're done. +diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c +index bf0f3ca76d..7d9833a360 100644 +--- a/src/backend/storage/smgr/md.c ++++ b/src/backend/storage/smgr/md.c +@@ -85,6 +85,8 @@ typedef struct _MdfdVec + + static MemoryContext MdCxt; /* context for all MdfdVec objects */ + ++mdextend_hook_type mdextend_hook = NULL; ++mdwrite_hook_type mdwrite_hook = NULL; + + /* Populate a file tag describing an md.c segment file. */ + #define INIT_MD_FILETAG(a,xx_rlocator,xx_forknum,xx_segno) \ +@@ -513,6 +515,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + register_dirty_segment(reln, forknum, v); + + Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); ++ ++ if (mdextend_hook) ++ mdextend_hook(reln->smgr_rlocator, forknum, blocknum); + } + + /* +@@ -620,6 +625,12 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, + + remblocks -= numblocks; + curblocknum += numblocks; ++ ++ if (mdextend_hook) ++ { ++ for (; blocknum < curblocknum; blocknum++) ++ mdextend_hook(reln->smgr_rlocator, forknum, blocknum); ++ } + } + } + +@@ -1015,7 +1026,14 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + + nblocks -= nblocks_this_segment; + buffers += nblocks_this_segment; +- blocknum += nblocks_this_segment; ++ ++ if (mdwrite_hook) ++ { ++ for (; nblocks_this_segment--; blocknum++) ++ mdwrite_hook(reln->smgr_rlocator, forknum, blocknum); ++ } ++ else ++ blocknum += nblocks_this_segment; + } + } + +diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c +index ab7137d0ff..bc40a763c0 100644 +--- a/src/backend/storage/sync/sync.c ++++ b/src/backend/storage/sync/sync.c +@@ -74,6 +74,8 @@ static MemoryContext pendingOpsCxt; /* context for the above */ + static CycleCtr sync_cycle_ctr = 0; + static CycleCtr checkpoint_cycle_ctr = 0; + ++ProcessSyncRequests_hook_type ProcessSyncRequests_hook = NULL; ++ + /* Intervals for calling AbsorbSyncRequests */ + #define FSYNCS_PER_ABSORB 10 + #define UNLINKS_PER_ABSORB 10 +@@ -470,6 +472,9 @@ ProcessSyncRequests(void) + CheckpointStats.ckpt_longest_sync = longest; + CheckpointStats.ckpt_agg_sync_time = total_elapsed; + ++ if (ProcessSyncRequests_hook) ++ ProcessSyncRequests_hook(); ++ + /* Flag successful completion of ProcessSyncRequests */ + sync_in_progress = false; + } +diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c +index 9e6fd435f6..f2180b9f6d 100644 +--- a/src/bin/pg_checksums/pg_checksums.c ++++ b/src/bin/pg_checksums/pg_checksums.c +@@ -110,6 +110,11 @@ static const struct exclude_list_item skip[] = { + {"pg_filenode.map", false}, + {"pg_internal.init", true}, + {"PG_VERSION", false}, ++ ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + #ifdef EXEC_BACKEND + {"config_exec_params", true}, + #endif +diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c +index e9dcb5a6d8..844b04d5e1 100644 +--- a/src/bin/pg_resetwal/pg_resetwal.c ++++ b/src/bin/pg_resetwal/pg_resetwal.c +@@ -86,6 +86,7 @@ static void FindEndOfXLOG(void); + static void KillExistingXLOG(void); + static void KillExistingArchiveStatus(void); + static void KillExistingWALSummaries(void); ++static void KillExistingPtrack(void); + static void WriteEmptyXLOG(void); + static void usage(void); + +@@ -495,6 +496,7 @@ main(int argc, char *argv[]) + KillExistingXLOG(); + KillExistingArchiveStatus(); + KillExistingWALSummaries(); ++ KillExistingPtrack(); + WriteEmptyXLOG(); + + printf(_("Write-ahead log reset\n")); +@@ -998,6 +1000,41 @@ KillExistingXLOG(void) + pg_fatal("could not close directory \"%s\": %m", XLOGDIR); + } + ++/* ++ * Remove existing ptrack files ++ */ ++static void ++KillExistingPtrack(void) ++{ ++#define PTRACKDIR "global" ++ ++ DIR *xldir; ++ struct dirent *xlde; ++ char path[MAXPGPATH + sizeof(PTRACKDIR)]; ++ ++ xldir = opendir(PTRACKDIR); ++ if (xldir == NULL) ++ pg_fatal("could not open directory \"%s\": %m", PTRACKDIR); ++ ++ while (errno = 0, (xlde = readdir(xldir)) != NULL) ++ { ++ if (strcmp(xlde->d_name, "ptrack.map.mmap") == 0 || ++ strcmp(xlde->d_name, "ptrack.map") == 0 || ++ strcmp(xlde->d_name, "ptrack.map.tmp") == 0) ++ { ++ snprintf(path, sizeof(path), "%s/%s", PTRACKDIR, xlde->d_name); ++ if (unlink(path) < 0) ++ pg_fatal("could not delete file \"%s\": %m", path); ++ } ++ } ++ ++ if (errno) ++ pg_fatal("could not read directory \"%s\": %m", PTRACKDIR); ++ ++ if (closedir(xldir)) ++ pg_fatal("could not close directory \"%s\": %m", PTRACKDIR); ++} ++ + + /* + * Remove existing archive status files +diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c +index 4458324c9d..7d857467f7 100644 +--- a/src/bin/pg_rewind/filemap.c ++++ b/src/bin/pg_rewind/filemap.c +@@ -156,6 +156,10 @@ static const struct exclude_list_item excludeFiles[] = + {"postmaster.pid", false}, + {"postmaster.opts", false}, + ++ {"ptrack.map.mmap", false}, ++ {"ptrack.map", false}, ++ {"ptrack.map.tmp", false}, ++ + /* end of list */ + {NULL, false} + }; +diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h +index 76787a8267..2c662f4022 100644 +--- a/src/include/access/xlog.h ++++ b/src/include/access/xlog.h +@@ -57,6 +57,9 @@ extern PGDLLIMPORT int wal_decode_buffer_size; + + extern PGDLLIMPORT int CheckPointSegments; + ++typedef void (*backup_checkpoint_request_hook_type) (void); ++extern PGDLLIMPORT backup_checkpoint_request_hook_type backup_checkpoint_request_hook; ++ + /* Archive modes */ + typedef enum ArchiveMode + { +diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h +index a25e258f47..b20b9c76e8 100644 +--- a/src/include/storage/copydir.h ++++ b/src/include/storage/copydir.h +@@ -13,6 +13,9 @@ + #ifndef COPYDIR_H + #define COPYDIR_H + ++typedef void (*copydir_hook_type) (const char *path); ++extern PGDLLIMPORT copydir_hook_type copydir_hook; ++ + extern void copydir(const char *fromdir, const char *todir, bool recurse); + extern void copy_file(const char *fromfile, const char *tofile); + +diff --git a/src/include/storage/md.h b/src/include/storage/md.h +index 620f10abde..b36936871b 100644 +--- a/src/include/storage/md.h ++++ b/src/include/storage/md.h +@@ -19,6 +19,13 @@ + #include "storage/smgr.h" + #include "storage/sync.h" + ++typedef void (*mdextend_hook_type) (RelFileLocatorBackend smgr_rlocator, ++ ForkNumber forknum, BlockNumber blocknum); ++extern PGDLLIMPORT mdextend_hook_type mdextend_hook; ++typedef void (*mdwrite_hook_type) (RelFileLocatorBackend smgr_rlocator, ++ ForkNumber forknum, BlockNumber blocknum); ++extern PGDLLIMPORT mdwrite_hook_type mdwrite_hook; ++ + /* md storage manager functionality */ + extern void mdinit(void); + extern void mdopen(SMgrRelation reln); +diff --git a/src/include/storage/sync.h b/src/include/storage/sync.h +index 9dee8fa6e5..348ed53e4e 100644 +--- a/src/include/storage/sync.h ++++ b/src/include/storage/sync.h +@@ -55,6 +55,9 @@ typedef struct FileTag + uint64 segno; + } FileTag; + ++typedef void (*ProcessSyncRequests_hook_type) (void); ++extern PGDLLIMPORT ProcessSyncRequests_hook_type ProcessSyncRequests_hook; ++ + extern void InitSync(void); + extern void SyncPreCheckpoint(void); + extern void SyncPostCheckpoint(void); diff --git a/patches/ptrack-2.0-core.diff b/patches/ptrack-2.0-core.diff deleted file mode 100644 index 5345d3c..0000000 --- a/patches/ptrack-2.0-core.diff +++ /dev/null @@ -1,989 +0,0 @@ -diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c -index 863f89f19d2..8f3143758ab 100644 ---- a/src/backend/commands/dbcommands.c -+++ b/src/backend/commands/dbcommands.c -@@ -56,6 +56,7 @@ - #include "storage/ipc.h" - #include "storage/md.h" - #include "storage/procarray.h" -+#include "storage/ptrack.h" - #include "storage/smgr.h" - #include "utils/acl.h" - #include "utils/builtins.h" -@@ -638,6 +639,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) - * We don't need to copy subdirectories - */ - copydir(srcpath, dstpath, false); -+ ptrack_walkdir(dstpath, dboid, dsttablespace); - - /* Record the filesystem change in XLOG */ - { -@@ -1276,6 +1278,7 @@ movedb(const char *dbname, const char *tblspcname) - * Copy files from the old tablespace to the new one - */ - copydir(src_dbpath, dst_dbpath, false); -+ ptrack_walkdir(dst_dbpath, db_id, dst_tblspcoid); - - /* - * Record the filesystem change in XLOG -@@ -2139,6 +2142,7 @@ dbase_redo(XLogReaderState *record) - * We don't need to copy subdirectories - */ - copydir(src_path, dst_path, false); -+ ptrack_walkdir(dst_path, xlrec->db_id, xlrec->tablespace_id); - } - else if (info == XLOG_DBASE_DROP) - { -diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c -index a73893237a4..2a2e17fb33e 100644 ---- a/src/backend/replication/basebackup.c -+++ b/src/backend/replication/basebackup.c -@@ -36,6 +36,7 @@ - #include "storage/dsm_impl.h" - #include "storage/fd.h" - #include "storage/ipc.h" -+#include "storage/ptrack.h" - #include "storage/reinit.h" - #include "utils/builtins.h" - #include "utils/ps_status.h" -@@ -195,6 +196,10 @@ static const char *excludeFiles[] = - "postmaster.pid", - "postmaster.opts", - -+ "ptrack.map.mmap", -+ "ptrack.map", -+ "ptrack.map.tmp", -+ - /* end of list */ - NULL - }; -diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c -index 7332e6b5903..08693933e18 100644 ---- a/src/backend/storage/buffer/bufmgr.c -+++ b/src/backend/storage/buffer/bufmgr.c -@@ -688,7 +688,8 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, - - SMgrRelation smgr = smgropen(rnode, InvalidBackendId); - -- Assert(InRecovery); -+ // XXX: required by ptrack -+ // Assert(InRecovery); - - return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum, - mode, strategy, &hit); -diff --git a/src/backend/storage/smgr/Makefile b/src/backend/storage/smgr/Makefile -index e486b7c0d1c..3c8f15b1444 100644 ---- a/src/backend/storage/smgr/Makefile -+++ b/src/backend/storage/smgr/Makefile -@@ -12,6 +12,6 @@ subdir = src/backend/storage/smgr - top_builddir = ../../../.. - include $(top_builddir)/src/Makefile.global - --OBJS = md.o smgr.o -+OBJS = md.o smgr.o ptrack.o - - include $(top_srcdir)/src/backend/common.mk -diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c -index 050cee5f9a9..94a7a1f7a1c 100644 ---- a/src/backend/storage/smgr/md.c -+++ b/src/backend/storage/smgr/md.c -@@ -33,6 +33,7 @@ - #include "storage/fd.h" - #include "storage/bufmgr.h" - #include "storage/md.h" -+#include "storage/ptrack.h" - #include "storage/relfilenode.h" - #include "storage/smgr.h" - #include "storage/sync.h" -@@ -422,6 +423,8 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - register_dirty_segment(reln, forknum, v); - - Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); -+ -+ ptrack_mark_block(reln->smgr_rnode, forknum, blocknum); - } - - /* -@@ -692,6 +695,8 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - - if (!skipFsync && !SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); -+ -+ ptrack_mark_block(reln->smgr_rnode, forknum, blocknum); - } - - /* -diff --git a/src/backend/storage/smgr/ptrack.c b/src/backend/storage/smgr/ptrack.c -new file mode 100644 -index 00000000000..b4e731cf652 ---- /dev/null -+++ b/src/backend/storage/smgr/ptrack.c -@@ -0,0 +1,655 @@ -+/*------------------------------------------------------------------------- -+ * -+ * ptrack.c -+ * map for tracking updates of relation's pages -+ * -+ * IDENTIFICATION -+ * src/backend/storage/smgr/ptrack.c -+ * -+ * INTERFACE ROUTINES (PostgreSQL side) -+ * ptrackMapInit() --- allocate new shared ptrack_map -+ * ptrackMapAttach() --- attach to the existing ptrack_map -+ * assign_ptrack_map_size() --- ptrack_map_size GUC assign callback -+ * ptrack_walkdir() --- walk directory and mark all blocks of all -+ * data files in ptrack_map -+ * ptrack_mark_block() --- mark single page in ptrack_map -+ * -+ *------------------------------------------------------------------------- -+ */ -+ -+#include "postgres.h" -+ -+#include -+#include -+ -+#ifndef WIN32 -+#include "sys/mman.h" -+#endif -+ -+#include "miscadmin.h" -+#include "funcapi.h" -+#include "access/hash.h" -+#include "access/table.h" -+#include "access/parallel.h" -+#include "access/xlog.h" -+#include "access/xlogutils.h" -+#include "access/skey.h" -+#include "access/genam.h" -+#include "access/generic_xlog.h" -+#include "access/htup_details.h" -+#include "catalog/pg_class.h" -+#include "catalog/pg_depend.h" -+#include "catalog/pg_tablespace.h" -+#include "nodes/makefuncs.h" -+#include "port/pg_crc32c.h" -+#include "storage/bufmgr.h" -+#include "storage/copydir.h" -+#include "storage/lmgr.h" -+#include "storage/ptrack.h" -+#include "storage/smgr.h" -+#include "storage/fd.h" -+#include "storage/bufmgr.h" -+#include "storage/md.h" -+#include "storage/reinit.h" -+#include "utils/inval.h" -+#include "utils/array.h" -+#include "utils/relfilenodemap.h" -+#include "utils/builtins.h" -+#include "utils/pg_lsn.h" -+#include "utils/lsyscache.h" -+ -+PtrackMap ptrack_map = NULL; -+uint64 ptrack_map_size; -+ -+static bool -+file_exists(const char *name) -+{ -+ struct stat st; -+ -+ AssertArg(name != NULL); -+ -+ if (stat(name, &st) == 0) -+ return S_ISDIR(st.st_mode) ? false : true; -+ else if (!(errno == ENOENT || errno == ENOTDIR || errno == EACCES)) -+ ereport(ERROR, -+ (errcode_for_file_access(), -+ errmsg("could not access file \"%s\": %m", name))); -+ -+ return false; -+} -+ -+/* Delete ptrack file adn free the memory when ptrack is disabled. -+ * -+ * This is performed by postmaster at start or by checkpointer,, -+ * so that there are no concurrent delete issues. -+ */ -+static void -+ptrackCleanFilesAndMap(void) -+{ -+ char ptrack_path[MAXPGPATH]; -+ char ptrack_mmap_path[MAXPGPATH]; -+ char ptrack_path_tmp[MAXPGPATH]; -+ -+ sprintf(ptrack_path, "%s/%s", DataDir, PTRACK_PATH); -+ sprintf(ptrack_mmap_path, "%s/%s", DataDir, PTRACK_MMAP_PATH); -+ sprintf(ptrack_path_tmp, "%s/%s", DataDir, PTRACK_PATH_TMP); -+ -+ elog(DEBUG1, "ptrackCleanFilesAndMap"); -+ -+ if (file_exists(ptrack_path_tmp)) -+ durable_unlink(ptrack_path_tmp, LOG); -+ -+ if (file_exists(ptrack_path)) -+ durable_unlink(ptrack_path, LOG); -+ -+ if (ptrack_map != NULL) -+ { -+#ifdef WIN32 -+ if (!UnmapViewOfFile(ptrack_map)) -+#else -+ if (!munmap(ptrack_map, sizeof(ptrack_map))) -+#endif -+ elog(LOG, "could not unmap ptrack_map"); -+ -+ ptrack_map = NULL; -+ if (file_exists(ptrack_mmap_path)) -+ durable_unlink(ptrack_mmap_path, LOG); -+ } -+ -+} -+ -+/* -+ * Copy PTRACK_PATH file to special temporary file PTRACK_MMAP_PATH used for mmaping, -+ * or create new file, if there was no PTRACK_PATH file on disk. -+ * -+ * Mmap the content of PTRACK_MMAP_PATH file into memory structure 'ptrack_map' -+ */ -+void -+ptrackMapInit(void) -+{ -+ int ptrack_fd; -+ pg_crc32c crc; -+ pg_crc32c *file_crc; -+ char ptrack_path[MAXPGPATH]; -+ char ptrack_mmap_path[MAXPGPATH]; -+ struct stat stat_buf; -+ bool is_new_map = true; -+ -+ elog(DEBUG1, "ptrackMapInit"); -+ -+ /* We do it at server start, so the map must be not allocated yet. */ -+ Assert(ptrack_map == NULL); -+ -+ /*Delete ptrack_map and all related files, if ptrack was switched off. */ -+ if (ptrack_map_size == 0) -+ { -+ ptrackCleanFilesAndMap(); -+ return; -+ } -+ -+ sprintf(ptrack_path, "%s/%s", DataDir, PTRACK_PATH); -+ sprintf(ptrack_mmap_path, "%s/%s", DataDir, PTRACK_MMAP_PATH); -+ -+ /* Remove old PTRACK_MMAP_PATH file, if exists */ -+ if (file_exists(ptrack_mmap_path)) -+ durable_unlink(ptrack_mmap_path, LOG); -+ -+ if (stat(ptrack_path, &stat_buf) == 0 && -+ stat_buf.st_size != ptrack_map_size) -+ { -+ elog(WARNING, "stat_buf.st_size != ptrack_map_size %zu != " UINT64_FORMAT, -+ (Size) stat_buf.st_size, ptrack_map_size); -+ durable_unlink(ptrack_path, LOG); -+ } -+ -+ /* -+ * If on-disk PTRACK_PATH file is present and has expected size, -+ * copy it to read and restore state -+ */ -+ if (stat(ptrack_path, &stat_buf) == 0) -+ { -+ copy_file(ptrack_path, ptrack_mmap_path); -+ /* flag to check checksum */ -+ is_new_map = false; -+ ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | PG_BINARY); -+ if (ptrack_fd < 0) -+ elog(ERROR, "Failed to open ptrack map file \"%s\": %m", ptrack_mmap_path); -+ } -+ else -+ { -+ /* Create new file for PTRACK_MMAP_PATH */ -+ ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | O_CREAT | PG_BINARY); -+ if (ptrack_fd < 0) -+ elog(ERROR, "Failed to open ptrack map file \"%s\": %m", ptrack_mmap_path); -+ } -+ -+#ifdef WIN32 -+ { -+ HANDLE mh = CreateFileMapping((HANDLE) _get_osfhandle(ptrack_fd), -+ NULL, -+ PAGE_READWRITE, -+ 0, -+ (DWORD) ptrack_map_size, -+ NULL); -+ if (mh == NULL) -+ elog(ERROR, "Failed to create file mapping: %m"); -+ -+ ptrack_map = (PtrackMap) MapViewOfFile(mh, FILE_MAP_ALL_ACCESS, 0, 0, 0); -+ if (ptrack_map == NULL) -+ { -+ CloseHandle(mh); -+ elog(ERROR, "Failed to mmap ptrack file: %m"); -+ } -+ } -+#else -+ if (ftruncate(ptrack_fd, ptrack_map_size) < 0) -+ elog(ERROR, "Failed to truncate ptrack file: %m"); -+ -+ ptrack_map = (PtrackMap) mmap(NULL, ptrack_map_size, -+ PROT_READ | PROT_WRITE, MAP_SHARED, -+ ptrack_fd, 0); -+ if (ptrack_map == MAP_FAILED) -+ elog(ERROR, "Failed to mmap ptrack file: %m"); -+#endif -+ -+ if (!is_new_map) -+ { -+ /* Check CRC */ -+ INIT_CRC32C(crc); -+ COMP_CRC32C(crc, (char *) ptrack_map, PtrackCrcOffset); -+ FIN_CRC32C(crc); -+ -+ file_crc = (pg_crc32c *) ((char *) ptrack_map + PtrackCrcOffset); -+ -+ elog(DEBUG1, "ptrackFileRead, crc %u, file_crc %u init_lsn %X/%X", -+ crc, *file_crc, (uint32) (ptrack_map->init_lsn >> 32), (uint32) ptrack_map->init_lsn); -+ -+ /* TODO Handle this error. Probably we can just recreate the file */ -+ if (!EQ_CRC32C(*file_crc, crc)) -+ { -+ pg_atomic_write_u64((pg_atomic_uint64 *) &(ptrack_map->init_lsn), InvalidXLogRecPtr); -+ elog(ERROR, "incorrect checksum of file \"%s\"", ptrack_path); -+ } -+ } -+} -+ -+/* -+ * Map must be already initialized by postmaster at start. -+ * mmap working copy of ptrack_map. -+ */ -+void -+ptrackMapAttach(void) -+{ -+ char ptrack_mmap_path[MAXPGPATH]; -+ int ptrack_fd; -+ struct stat stat_buf; -+ -+ elog(DEBUG1, "ptrackMapAttach"); -+ -+ /* We do it at process start, so the map must be not allocated yet. */ -+ Assert(ptrack_map == NULL); -+ -+ if (ptrack_map_size == 0) -+ return; -+ -+ sprintf(ptrack_mmap_path, "%s/%s", DataDir, PTRACK_MMAP_PATH); -+ if (!file_exists(ptrack_mmap_path)) -+ { -+ elog(WARNING, "ptrackMapAttach(). '%s' file doesn't exist ", ptrack_mmap_path); -+ return; -+ } -+ -+ if (stat(ptrack_mmap_path, &stat_buf) == 0 && -+ stat_buf.st_size != ptrack_map_size) -+ elog(ERROR, "ptrack_map_size doesn't match size of the file \"%s\"", ptrack_mmap_path); -+ -+ -+ ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | PG_BINARY); -+ if (ptrack_fd < 0) -+ elog(ERROR, "Failed to open ptrack map file \"%s\": %m", ptrack_mmap_path); -+ -+ elog(DEBUG1, "ptrackMapAttach before mmap"); -+#ifdef WIN32 -+ { -+ HANDLE mh = CreateFileMapping((HANDLE) _get_osfhandle(ptrack_fd), -+ NULL, -+ PAGE_READWRITE, -+ 0, -+ (DWORD) ptrack_map_size, -+ NULL); -+ if (mh == NULL) -+ elog(ERROR, "Failed to create file mapping: %m"); -+ -+ ptrack_map = (PtrackMap) MapViewOfFile(mh, FILE_MAP_ALL_ACCESS, 0, 0, 0); -+ if (ptrack_map == NULL) -+ { -+ CloseHandle(mh); -+ elog(ERROR, "Failed to mmap ptrack file: %m"); -+ } -+ } -+#else -+ ptrack_map = (PtrackMap) mmap(NULL, ptrack_map_size, -+ PROT_READ | PROT_WRITE, MAP_SHARED, -+ ptrack_fd, 0); -+ if (ptrack_map == MAP_FAILED) -+ elog(ERROR, "Failed to mmap ptrack file: %m"); -+#endif -+} -+ -+/* -+ * Write content of ptrack_map to file. -+ */ -+void -+ptrackCheckpoint(void) -+{ -+ int ptrack_tmp_fd; -+ pg_crc32c crc; -+ char ptrack_path[MAXPGPATH]; -+ char ptrack_path_tmp[MAXPGPATH]; -+ XLogRecPtr init_lsn; -+ XLogRecPtr buf[PTRACK_BUF_SIZE]; -+ uint32 crc_buf[2]; -+ struct stat stat_buf; -+ int i = 0; -+ int j = 0; -+ -+ elog(DEBUG1, "ptrackCheckpoint"); -+ -+ /* Delete ptrack_map and all related files, if ptrack was switched off */ -+ if (ptrack_map_size == 0) -+ { -+ ptrackCleanFilesAndMap(); -+ return; -+ } -+ else if (ptrack_map == NULL) -+ elog(ERROR, "ptrack map is not loaded"); -+ -+ sprintf(ptrack_path_tmp, "%s/%s", DataDir, PTRACK_PATH_TMP); -+ sprintf(ptrack_path, "%s/%s", DataDir, PTRACK_PATH); -+ -+ elog(DEBUG1, "ptrackCheckpoint() start"); -+ -+ /* map content is protected with CRC */ -+ INIT_CRC32C(crc); -+ -+ ptrack_tmp_fd = BasicOpenFile(ptrack_path_tmp, -+ O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY); -+ -+ if (ptrack_tmp_fd < 0) -+ ereport(ERROR, -+ (errcode_for_file_access(), -+ errmsg("could not create file \"%s\": %m", ptrack_path_tmp))); -+ -+ /* -+ * PtrackMapHdr only contains one XLogRecPtr (or uint64) value. -+ * Copy it to the file. -+ * -+ * TODO: it seems that we do not need atomic read here and could -+ * write the entire PtrackMap instead. -+ */ -+ init_lsn = pg_atomic_read_u64((pg_atomic_uint64 *) &(ptrack_map->init_lsn)); -+ -+ /* Set init_lsn during checkpoint if it is not set yet */ -+ if (init_lsn == InvalidXLogRecPtr) -+ { -+ XLogRecPtr new_init_lsn; -+ -+ if (RecoveryInProgress()) -+ new_init_lsn = GetXLogReplayRecPtr(NULL); -+ else -+ new_init_lsn = GetXLogInsertRecPtr(); -+ -+ pg_atomic_write_u64((pg_atomic_uint64 *) &(ptrack_map->init_lsn), new_init_lsn); -+ init_lsn = new_init_lsn; -+ } -+ -+ COMP_CRC32C(crc, (char *) &init_lsn, sizeof(init_lsn)); -+ if (write(ptrack_tmp_fd, &init_lsn, sizeof(init_lsn)) != sizeof(init_lsn)) -+ { -+ /* if write didn't set errno, assume problem is no disk space */ -+ if (errno == 0) -+ errno = ENOSPC; -+ ereport(ERROR, -+ (errcode_for_file_access(), -+ errmsg("could not write file \"%s\": %m", ptrack_path_tmp))); -+ } -+ -+ /* -+ * Iterate over ptrack map actual content and sync it to file. -+ * It's essential to read each element atomically to avoid partial reads, -+ * since map can be updated concurrently without any lock. -+ */ -+ while (i < PtrackContentNblocks) -+ { -+ buf[j] = pg_atomic_read_u64(&PtrackContent(ptrack_map)[i]); -+ -+ i++; -+ j++; -+ -+ if (j == PTRACK_BUF_SIZE) -+ { -+ int writesz = sizeof(buf); -+ -+ COMP_CRC32C(crc, (char *) buf, writesz); -+ if (write(ptrack_tmp_fd, buf, writesz) != writesz) -+ { -+ /* if write didn't set errno, assume problem is no disk space */ -+ if (errno == 0) -+ errno = ENOSPC; -+ ereport(ERROR, -+ (errcode_for_file_access(), -+ errmsg("could not write file \"%s\": %m", ptrack_path_tmp))); -+ } -+ -+ elog(DEBUG1, "i %d, j %d, writesz %d PtrackContentNblocks " UINT64_FORMAT, -+ i, j, writesz, (uint64) PtrackContentNblocks); -+ -+ j = 0; -+ } -+ } -+ -+ /* Write if anythig left */ -+ // TODO: check this i -+ if ((i + 1) % PTRACK_BUF_SIZE != 0) -+ { -+ int writesz = sizeof(XLogRecPtr)*(j); -+ -+ COMP_CRC32C(crc, (char *) buf, writesz); -+ if (write(ptrack_tmp_fd, buf, writesz) != writesz) -+ { -+ /* if write didn't set errno, assume problem is no disk space */ -+ if (errno == 0) -+ errno = ENOSPC; -+ ereport(ERROR, -+ (errcode_for_file_access(), -+ errmsg("could not write file \"%s\": %m", ptrack_path_tmp))); -+ } -+ -+ elog(DEBUG1, "Final i %d, j %d, writesz %d PtrackContentNblocks " UINT64_FORMAT, -+ i, j, writesz, (uint64) PtrackContentNblocks); -+ -+ } -+ -+ /* add 32bit padding before checksum */ -+ crc_buf[0] = 0; -+ COMP_CRC32C(crc, (char *) crc_buf, sizeof(uint32)); -+ FIN_CRC32C(crc); -+ -+ crc_buf[1] = crc; -+ -+ if (write(ptrack_tmp_fd, crc_buf, sizeof(crc_buf)) != sizeof(crc_buf)) -+ { -+ /* if write didn't set errno, assume problem is no disk space */ -+ if (errno == 0) -+ errno = ENOSPC; -+ ereport(ERROR, -+ (errcode_for_file_access(), -+ errmsg("could not write file \"%s\": %m", ptrack_path_tmp))); -+ } -+ -+ if (pg_fsync(ptrack_tmp_fd) != 0) -+ ereport(ERROR, -+ (errcode_for_file_access(), -+ errmsg("could not fsync file \"%s\": %m", ptrack_path_tmp))); -+ -+ if (close(ptrack_tmp_fd) != 0) -+ ereport(ERROR, -+ (errcode_for_file_access(), -+ errmsg("could not close file \"%s\": %m", ptrack_path_tmp))); -+ -+ /* And finally replace old file with the new one */ -+ durable_rename(ptrack_path_tmp, ptrack_path, LOG); -+ -+ /* sanity check */ -+ if (stat(ptrack_path, &stat_buf) == 0 && -+ stat_buf.st_size != ptrack_map_size) -+ { -+ elog(ERROR, "ptrackCheckpoint(). stat_buf.st_size != ptrack_map_size %zu != " UINT64_FORMAT, -+ (Size) stat_buf.st_size, ptrack_map_size); -+ } -+ elog(DEBUG1, "ptrackCheckpoint() completed"); -+} -+ -+void -+assign_ptrack_map_size(int newval, void *extra) -+{ -+ elog(DEBUG1, "assign_ptrack_map_size, MyProc %d newval %d ptrack_map_size " UINT64_FORMAT, -+ MyProcPid, newval, ptrack_map_size); -+ -+ if (newval != 0 && !XLogIsNeeded()) -+ ereport(ERROR, -+ (errmsg("уou cannot use ptrack if wal_level is minimal "), -+ errdetail("Either set wal_level to \"replica\" or higher, or turn off ptrack with \"ptrack_map_size=0\""))); -+ -+ if (DataDir != NULL && !IsBootstrapProcessingMode() && -+ !InitializingParallelWorker) -+ { -+ /* Always assign ptrack_map_size */ -+ ptrack_map_size = newval*1024*1024; -+ -+ /* Init map on postmaster start */ -+ if (!IsUnderPostmaster) -+ { -+ elog(DEBUG1, "assign_ptrack_map_size, MyProc %d newval %d ptrack_map_size " UINT64_FORMAT, -+ MyProcPid, newval, ptrack_map_size); -+ -+ if (ptrack_map == NULL) -+ { -+ elog(DEBUG1, "ptrack_map_size() do ptrackMapInit()"); -+ ptrackMapInit(); -+ } -+ } -+ else -+ { -+ elog(DEBUG1, "ptrack_map_size(). do ptrackMapAttach() MyProc %d ptrack_map_size " UINT64_FORMAT, -+ MyProcPid, ptrack_map_size); -+ ptrackMapAttach(); -+ } -+ } -+} -+ -+ -+/* -+ * Mark all blocks of the file in ptrack_map. -+ * For use in functions that copy directories bypassing buffer manager. -+ */ -+static void -+ptrack_mark_file(Oid dbOid, Oid tablespaceOid, -+ const char *filepath, const char *filename) -+{ -+ RelFileNodeBackend rnode; -+ ForkNumber forknum; -+ BlockNumber blkno, -+ nblocks = 0; -+ struct stat stat_buf; -+ int oidchars; -+ char oidbuf[OIDCHARS + 1]; -+ -+ /* do not track temporary relations */ -+ if (looks_like_temp_rel_name(filename)) -+ return; -+ -+ /* mark of non-temporary relation */ -+ rnode.backend = InvalidBackendId; -+ -+ rnode.node.dbNode = dbOid; -+ rnode.node.spcNode = tablespaceOid; -+ -+ if (!parse_filename_for_nontemp_relation(filename, &oidchars, &forknum)) -+ return; -+ -+ -+ memcpy(oidbuf, filename, oidchars); -+ oidbuf[oidchars] = '\0'; -+ rnode.node.relNode = atooid(oidbuf); -+ -+ /* compute number of blocks based on file size */ -+ if (stat(filepath, &stat_buf) == 0) -+ nblocks = stat_buf.st_size / BLCKSZ; -+ -+ elog(DEBUG1, "ptrack_mark_file %s, nblocks %u rnode db %u spc %u rel %u, forknum %d", -+ filepath, nblocks, rnode.node.dbNode, rnode.node.spcNode, rnode.node.relNode, forknum); -+ for (blkno = 0; blkno < nblocks; blkno++) -+ ptrack_mark_block(rnode, forknum, blkno); -+} -+ -+/* -+ * Mark all files in the given directory in ptrack_map. -+ * For use in functions that copy directories bypassing buffer manager. -+ * -+ * TODO do we need to add process_symlinks? -+ */ -+void -+ptrack_walkdir(const char *path, Oid dbOid, Oid tablespaceOid) -+{ -+ DIR *dir; -+ struct dirent *de; -+ -+ /* Do not walk during bootstrap and if ptrack is disabled */ -+ if (ptrack_map_size == 0 -+ || DataDir == NULL -+ || IsBootstrapProcessingMode() -+ || InitializingParallelWorker) -+ return; -+ -+ dir = AllocateDir(path); -+ -+ while ((de = ReadDirExtended(dir, path, LOG)) != NULL) -+ { -+ char subpath[MAXPGPATH * 2]; -+ struct stat fst; -+ int sret; -+ -+ CHECK_FOR_INTERRUPTS(); -+ -+ if (strcmp(de->d_name, ".") == 0 || -+ strcmp(de->d_name, "..") == 0) -+ continue; -+ -+ snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name); -+ -+ sret = lstat(subpath, &fst); -+ -+ if (sret < 0) -+ { -+ ereport(LOG, -+ (errcode_for_file_access(), -+ errmsg("could not stat file \"%s\": %m", subpath))); -+ continue; -+ } -+ -+ if (S_ISREG(fst.st_mode)) -+ ptrack_mark_file(dbOid, tablespaceOid, subpath, de->d_name); -+ else if (S_ISDIR(fst.st_mode)) -+ ptrack_walkdir(subpath, false, LOG); -+ } -+ -+ FreeDir(dir); /* we ignore any error here */ -+} -+ -+/* -+ * Mark modified block in ptrack_map. -+ */ -+void ptrack_mark_block(RelFileNodeBackend smgr_rnode, -+ ForkNumber forknum, BlockNumber blocknum) -+{ -+ size_t hash; -+ XLogRecPtr new_lsn; -+ XLogRecPtr old_lsn; -+ XLogRecPtr old_init_lsn; -+ PtBlockId bid; -+ -+ if (ptrack_map_size != 0 && (ptrack_map != NULL) && -+ smgr_rnode.backend == InvalidBackendId) /* do not track temporary relations */ -+ { -+ bid.relnode = smgr_rnode.node; -+ bid.forknum = forknum; -+ bid.blocknum = blocknum; -+ hash = BID_HASH_FUNC(bid); -+ -+ if (RecoveryInProgress()) -+ new_lsn = GetXLogReplayRecPtr(NULL); -+ else -+ new_lsn = GetXLogInsertRecPtr(); -+ -+ old_lsn = pg_atomic_read_u64(&PtrackContent(ptrack_map)[hash]); -+ -+ /* Atomically assign new init LSN value */ -+ old_init_lsn = pg_atomic_read_u64((pg_atomic_uint64 *) &(ptrack_map->init_lsn)); -+ -+ if (old_init_lsn == InvalidXLogRecPtr) -+ { -+ elog(DEBUG1, "ptrack_mark_block() init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn , new_lsn); -+ -+ while (old_init_lsn < new_lsn && -+ !pg_atomic_compare_exchange_u64((pg_atomic_uint64 *) &(ptrack_map->init_lsn), &old_init_lsn, new_lsn)); -+ } -+ -+ elog(DEBUG1, "ptrack_mark_block() map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, hash, old_lsn , new_lsn); -+ -+ /* Atomically assign new LSN value */ -+ while (old_lsn < new_lsn && -+ !pg_atomic_compare_exchange_u64(&PtrackContent(ptrack_map)[hash], &old_lsn, new_lsn)); -+ elog(DEBUG1, "ptrack_mark_block() map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&PtrackContent(ptrack_map)[hash])); -+ } -+} -diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c -index 705f229b27f..87c5d4f965d 100644 ---- a/src/backend/storage/sync/sync.c -+++ b/src/backend/storage/sync/sync.c -@@ -28,6 +28,7 @@ - #include "storage/bufmgr.h" - #include "storage/ipc.h" - #include "storage/md.h" -+#include "storage/ptrack.h" - #include "utils/hsearch.h" - #include "utils/memutils.h" - #include "utils/inval.h" -@@ -418,6 +419,13 @@ ProcessSyncRequests(void) - CheckpointStats.ckpt_longest_sync = longest; - CheckpointStats.ckpt_agg_sync_time = total_elapsed; - -+ /* -+ * Flush ptrack file -+ * TODO find proper place for this call, -+ * so that we're sure that all needed changes are present in ptrack file -+ */ -+ ptrackCheckpoint(); -+ - /* Flag successful completion of ProcessSyncRequests */ - sync_in_progress = false; - } -diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c -index dc8f910ea46..74bda8275cd 100644 ---- a/src/backend/utils/misc/guc.c -+++ b/src/backend/utils/misc/guc.c -@@ -78,6 +78,8 @@ - #include "storage/pg_shmem.h" - #include "storage/proc.h" - #include "storage/predicate.h" -+#include "storage/ptrack.h" -+#include "storage/standby.h" - #include "tcop/tcopprot.h" - #include "tsearch/ts_cache.h" - #include "utils/builtins.h" -@@ -581,6 +583,7 @@ static char *recovery_target_xid_string; - static char *recovery_target_name_string; - static char *recovery_target_lsn_string; - -+static int ptrack_map_size_tmp; - - /* should be static, but commands/variable.c needs to get at this */ - char *role_string; -@@ -1961,6 +1964,16 @@ static struct config_bool ConfigureNamesBool[] = - - static struct config_int ConfigureNamesInt[] = - { -+ { -+ {"ptrack_map_size", PGC_POSTMASTER, RESOURCES_DISK, -+ gettext_noop("Size of ptrack map used for incremental backup: 0 disabled."), -+ NULL, -+ GUC_UNIT_MB -+ }, -+ &ptrack_map_size_tmp, -+ 0, 0, 1024, -+ NULL, assign_ptrack_map_size, NULL -+ }, - { - {"archive_timeout", PGC_SIGHUP, WAL_ARCHIVING, - gettext_noop("Forces a switch to the next WAL file if a " -diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c -index 601f7e9690e..f8bb8ff521f 100644 ---- a/src/bin/pg_rewind/filemap.c -+++ b/src/bin/pg_rewind/filemap.c -@@ -103,6 +103,9 @@ static const char *excludeFiles[] = - "postmaster.pid", - "postmaster.opts", - -+ "ptrack.map.mmap", -+ "ptrack.map", -+ "ptrack.map.tmp", - /* end of list */ - NULL - }; -diff --git a/src/include/storage/ptrack.h b/src/include/storage/ptrack.h -new file mode 100644 -index 00000000000..67c66913479 ---- /dev/null -+++ b/src/include/storage/ptrack.h -@@ -0,0 +1,129 @@ -+/*------------------------------------------------------------------------- -+ * -+ * ptrack.h -+ * header for ptrack map for tracking updates of relation's pages -+ * -+ * -+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group -+ * Portions Copyright (c) 1994, Regents of the University of California -+ * -+ * src/include/storage/ptrack.h -+ * -+ *------------------------------------------------------------------------- -+ */ -+#ifndef PTRACK_H -+#define PTRACK_H -+ -+#include "access/xlogdefs.h" -+#include "storage/block.h" -+#include "storage/buf.h" -+#include "storage/relfilenode.h" -+#include "storage/smgr.h" -+#include "utils/relcache.h" -+ -+/* Ptrack version as a string */ -+#define PTRACK_VERSION "2.0" -+/* Ptrack version as a number */ -+#define PTRACK_VERSION_NUM 200 -+ -+/* Working copy of ptrack.map */ -+#define PTRACK_MMAP_PATH "global/ptrack.map.mmap" -+/* Persistent copy of ptrack.map to restore after crash */ -+#define PTRACK_PATH "global/ptrack.map" -+/* Used for atomical crash-safe update of ptrack.map */ -+#define PTRACK_PATH_TMP "global/ptrack.map.tmp" -+ -+#define PTRACK_BUF_SIZE 1000 -+ -+/* header of ptrack map */ -+typedef struct PtrackMapHdr { -+ /* -+ * Default value. Used for the map reset. -+ * LSN of the moment, when map was last enabled -+ */ -+ XLogRecPtr init_lsn; -+ -+ /* Followed by the actual map of LSNs */ -+ XLogRecPtr entries[FLEXIBLE_ARRAY_MEMBER]; -+} PtrackMapHdr; -+/* At the end of the map stored CRC of type pg_crc32c */ -+ -+typedef PtrackMapHdr *PtrackMap; -+ -+/* -+ * Structure identifying block on the disk. -+ */ -+typedef struct PtBlockId -+{ -+ RelFileNode relnode; -+ ForkNumber forknum; -+ BlockNumber blocknum; -+} PtBlockId; -+ -+/* -+ * Context for pg_ptrack_get_pagemapset set returning function. -+ */ -+typedef struct PtScanCtx -+{ -+ XLogRecPtr lsn; -+ PtBlockId bid; -+ uint32 relsize; -+ char *relpath; -+ List *filelist; -+} PtScanCtx; -+ -+/* -+ * List item type for ptrack data files list. -+ */ -+typedef struct PtrackFileList_i { -+ RelFileNode relnode; -+ ForkNumber forknum; -+ int segno; -+ char *path; -+} PtrackFileList_i; -+ -+// TODO: check MAXALIGN usage below -+/* -+ * Pointer to the actual map (LSN array) start -+ * -+ * Cast ptrack_map to char* to shift on exactly MAXALIGN(sizeof(PtrackMapHdr)) bytes -+ */ -+#define PtrackContent(ptrack_map) \ -+ ((pg_atomic_uint64 *) (((char *) (ptrack_map)) + MAXALIGN(sizeof(PtrackMapHdr)))) -+ -+/* Actual size of ptrack_map (LSN array) excluding header and crc */ -+#define PtrackContentSize \ -+ (ptrack_map_size - MAXALIGN(sizeof(PtrackMapHdr)) - MAXALIGN(sizeof(pg_crc32c))) -+ -+/* Number of elements in ptrack_map (LSN array) */ -+#define PtrackContentNblocks \ -+ (PtrackContentSize / sizeof(XLogRecPtr)) -+ -+#define PtrackCrcOffset (ptrack_map_size - sizeof(pg_crc32c)) -+ -+/* Map block address 'bid' to map slot */ -+#define BID_HASH_FUNC(bid) \ -+ (size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0)) % PtrackContentNblocks) -+ -+/* -+ * Per process pointer to shared ptrack_map -+ */ -+extern PGDLLIMPORT PtrackMap ptrack_map; -+ -+/* -+ * Size of ptrack map in bytes -+ * TODO: to be protected by PtrackResizeLock? -+ */ -+extern PGDLLIMPORT uint64 ptrack_map_size; -+ -+extern void ptrackCheckpoint(void); -+extern void ptrackMapInit(void); -+extern void ptrackMapAttach(void); -+ -+extern void assign_ptrack_map_size(int newval, void *extra); -+ -+extern void ptrack_walkdir(const char *path, Oid dbOid, Oid tablespaceOid); -+extern void ptrack_mark_block(RelFileNodeBackend smgr_rnode, -+ ForkNumber forkno, BlockNumber blkno); -+ -+#endif /* PTRACK_H */ diff --git a/patches/turn-off-hint-bits.diff b/patches/turn-off-hint-bits.diff new file mode 100644 index 0000000..1dd5d25 --- /dev/null +++ b/patches/turn-off-hint-bits.diff @@ -0,0 +1,16 @@ +diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c +index 537e681b236..bd9f010f2ea 100644 +--- a/src/backend/access/heap/heapam_visibility.c ++++ b/src/backend/access/heap/heapam_visibility.c +@@ -113,6 +113,11 @@ static inline void + SetHintBits(HeapTupleHeader tuple, Buffer buffer, + uint16 infomask, TransactionId xid) + { ++ /* ++ * Turn off hint bits to test pg_probackup with PG_PROBACKUP_PARANOIA=ON, ++ * i.e. doing per block checksums comparison. ++ */ ++ return; + if (TransactionIdIsValid(xid)) + { + /* NB: xid must be known committed here! */ diff --git a/ptrack--2.0--2.1.sql b/ptrack--2.0--2.1.sql new file mode 100644 index 0000000..80de2be --- /dev/null +++ b/ptrack--2.0--2.1.sql @@ -0,0 +1,25 @@ +/* ptrack/ptrack--2.0--2.1.sql */ + +-- Complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION ptrack UPDATE;" to load this file. \quit + +DROP FUNCTION ptrack_version(); +DROP FUNCTION pg_ptrack_get_pagemapset(pg_lsn); +DROP FUNCTION pg_ptrack_control_lsn(); +DROP FUNCTION pg_ptrack_get_block(oid, oid, oid, int8); + +CREATE FUNCTION ptrack_version() +RETURNS text +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE; + +CREATE FUNCTION ptrack_init_lsn() +RETURNS pg_lsn +AS 'MODULE_PATHNAME' +LANGUAGE C VOLATILE; + +CREATE FUNCTION ptrack_get_pagemapset(start_lsn pg_lsn) +RETURNS TABLE (path text, + pagemap bytea) +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT VOLATILE; diff --git a/ptrack--2.1--2.2.sql b/ptrack--2.1--2.2.sql new file mode 100644 index 0000000..da897b6 --- /dev/null +++ b/ptrack--2.1--2.2.sql @@ -0,0 +1,35 @@ +/* ptrack/ptrack--2.1--2.2.sql */ + +-- Complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION ptrack UPDATE;" to load this file. \quit + +DROP FUNCTION ptrack_get_pagemapset(start_lsn pg_lsn); +CREATE FUNCTION ptrack_get_pagemapset(start_lsn pg_lsn) +RETURNS TABLE (path text, + pagecount bigint, + pagemap bytea) +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION ptrack_get_change_stat(start_lsn pg_lsn) + RETURNS TABLE ( + files bigint, + pages numeric, + "size, MB" numeric + ) AS +$func$ +DECLARE +block_size bigint; +BEGIN + block_size := (SELECT setting FROM pg_settings WHERE name = 'block_size'); + + RETURN QUERY + SELECT changed_files, + changed_pages, + block_size * changed_pages / (1024.0 * 1024) + FROM + (SELECT count(path) AS changed_files, + sum(pagecount) AS changed_pages + FROM ptrack_get_pagemapset(start_lsn)) s; +END +$func$ LANGUAGE plpgsql; diff --git a/ptrack.sql b/ptrack--2.1.sql similarity index 51% rename from ptrack.sql rename to ptrack--2.1.sql index 5324481..c963964 100644 --- a/ptrack.sql +++ b/ptrack--2.1.sql @@ -1,26 +1,20 @@ +/* ptrack/ptrack--2.1.sql */ + -- Complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION ptrack" to load this file. \quit CREATE FUNCTION ptrack_version() RETURNS text AS 'MODULE_PATHNAME' -LANGUAGE C STRICT; +LANGUAGE C IMMUTABLE; -CREATE FUNCTION pg_ptrack_control_lsn() +CREATE FUNCTION ptrack_init_lsn() RETURNS pg_lsn AS 'MODULE_PATHNAME' -LANGUAGE C STRICT; +LANGUAGE C VOLATILE; -CREATE FUNCTION pg_ptrack_get_pagemapset(start_lsn pg_lsn) +CREATE FUNCTION ptrack_get_pagemapset(start_lsn pg_lsn) RETURNS TABLE (path text, pagemap bytea) AS 'MODULE_PATHNAME' LANGUAGE C STRICT VOLATILE; - -CREATE FUNCTION pg_ptrack_get_block(tablespace_oid oid, - db_oid oid, - relfilenode oid, - blockno int8) -RETURNS bytea -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT; diff --git a/ptrack--2.2--2.3.sql b/ptrack--2.2--2.3.sql new file mode 100644 index 0000000..6c5f574 --- /dev/null +++ b/ptrack--2.2--2.3.sql @@ -0,0 +1,5 @@ +/* ptrack/ptrack--2.2--2.3.sql */ + +-- Complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION ptrack UPDATE;" to load this file. \quit + diff --git a/ptrack--2.3--2.4.sql b/ptrack--2.3--2.4.sql new file mode 100644 index 0000000..780bba5 --- /dev/null +++ b/ptrack--2.3--2.4.sql @@ -0,0 +1,5 @@ +/* ptrack/ptrack--2.3--2.4.sql */ + +-- Complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION ptrack UPDATE;" to load this file. \quit + diff --git a/ptrack.c b/ptrack.c index 9f1a32e..e2f3627 100644 --- a/ptrack.c +++ b/ptrack.c @@ -1,30 +1,25 @@ /* * ptrack.c - * Public API for in-core ptrack engine + * Block level incremental backup engine * - * Copyright (c) 2019-2020, Postgres Professional + * Copyright (c) 2019-2022, Postgres Professional * * IDENTIFICATION - * contrib/ptrack/ptrack.c - */ - -/* - * ############################################################# - * # _____ _______ _____ _____ _ __ ___ ___ # - * # | __ \__ __| __ \ /\ / ____| |/ / |__ \ / _ \ # - * # | |__) | | | | |__) | / \ | | | ' / ) || | | | # - * # | ___/ | | | _ / / /\ \| | | < / / | | | | # - * # | | | | | | \ \ / ____ \ |____| . \ / /_ | |_| | # - * # |_| |_| |_| \_\/_/ \_\_____|_|\_\ |____(_)___/ # - * ############################################################# + * ptrack/ptrack.c * - * Currently ptrack 2.0 has following public API methods: + * INTERFACE ROUTINES (PostgreSQL side) + * ptrackMapInit() --- allocate new shared ptrack_map + * assign_ptrack_map_size() --- ptrack_map_size GUC assign callback + * ptrack_walkdir() --- walk directory and mark all blocks of all + * data files in ptrack_map + * ptrack_mark_block() --- mark single page in ptrack_map * - * # ptrack_version --- returns ptrack version string (2.0 currently). - * # pg_ptrack_get_pagemapset('LSN') --- returns a set of changed data files with + * Currently ptrack has following public API methods: + * + * # ptrack_version --- returns ptrack version string (2.4 currently). + * # ptrack_get_pagemapset('LSN') --- returns a set of changed data files with * bitmaps of changed blocks since specified LSN. - * # pg_ptrack_control_lsn --- returns LSN of the last ptrack map initialization. - * # pg_ptrack_get_block --- returns a spicific block of relation. + * # ptrack_init_lsn --- returns LSN of the last ptrack map initialization. * */ @@ -33,26 +28,65 @@ #include #include +#if PG_VERSION_NUM < 120000 +#include "access/htup_details.h" +#endif +#include "catalog/pg_tablespace.h" +#include "catalog/pg_type.h" #include "funcapi.h" #include "miscadmin.h" -#include "access/hash.h" -#include "access/skey.h" -#include "catalog/pg_type.h" -#include "catalog/pg_tablespace.h" +#include "nodes/pg_list.h" +#include "port/pg_crc32c.h" +#include "storage/copydir.h" +#include "storage/ipc.h" #include "storage/lmgr.h" -#include "storage/ptrack.h" +#if PG_VERSION_NUM >= 120000 +#include "storage/md.h" +#endif +#include "storage/smgr.h" #include "storage/reinit.h" #include "utils/builtins.h" +#include "utils/guc.h" #include "utils/pg_lsn.h" -#include "nodes/pg_list.h" + +#include "datapagemap.h" +#include "ptrack.h" +#include "engine.h" PG_MODULE_MAGIC; -void _PG_init(void); -void _PG_fini(void); +PtrackMap ptrack_map = NULL; +uint64 ptrack_map_size = 0; +int ptrack_map_size_tmp; + +static shmem_startup_hook_type prev_shmem_startup_hook = NULL; +static copydir_hook_type prev_copydir_hook = NULL; +static mdwrite_hook_type prev_mdwrite_hook = NULL; +static mdextend_hook_type prev_mdextend_hook = NULL; +static ProcessSyncRequests_hook_type prev_ProcessSyncRequests_hook = NULL; +#if PG_VERSION_NUM >= 170000 +static backup_checkpoint_request_hook_type prev_backup_checkpoint_request_hook = NULL; +#endif + +void _PG_init(void); + +static void ptrack_shmem_startup_hook(void); +static void ptrack_copydir_hook(const char *path); +static void ptrack_mdwrite_hook(RelFileNodeBackend smgr_rnode, + ForkNumber forkno, BlockNumber blkno); +static void ptrack_mdextend_hook(RelFileNodeBackend smgr_rnode, + ForkNumber forkno, BlockNumber blkno); +static void ptrack_ProcessSyncRequests_hook(void); +#if PG_VERSION_NUM >= 170000 +static void ptrack_backup_checkpoint_request_hook(void); +#endif static void ptrack_gather_filelist(List **filelist, char *path, Oid spcOid, Oid dbOid); -static int ptrack_filelist_getnext(PtScanCtx *ctx); +static int ptrack_filelist_getnext(PtScanCtx * ctx); +#if PG_VERSION_NUM >= 150000 +static shmem_request_hook_type prev_shmem_request_hook = NULL; +static void ptrack_shmem_request(void); +#endif /* * Module load callback @@ -60,85 +94,207 @@ static int ptrack_filelist_getnext(PtScanCtx *ctx); void _PG_init(void) { + if (!process_shared_preload_libraries_in_progress) + elog(ERROR, "ptrack module must be initialized by Postmaster. " + "Put the following line to configuration file: " + "shared_preload_libraries='ptrack'"); + + /* + * Define (or redefine) custom GUC variables. + * + * XXX: for some reason assign_ptrack_map_size is called twice during the + * postmaster boot! First, it is always called with bootValue, so we use + * -1 as default value and no-op here. Next, it is called with the actual + * value from config. + */ + DefineCustomIntVariable("ptrack.map_size", + "Sets the size of ptrack map in MB used for incremental backup (0 disabled).", + NULL, + &ptrack_map_size_tmp, + 0, +#if SIZEOF_SIZE_T == 8 + 0, 32 * 1024, /* limit to 32 GB */ +#else + 0, 256, /* limit to 256 MB */ +#endif + PGC_POSTMASTER, + GUC_UNIT_MB, + NULL, + assign_ptrack_map_size, + NULL); + + /* Request server shared memory */ + if (ptrack_map_size != 0) + { +#if PG_VERSION_NUM >= 150000 + prev_shmem_request_hook = shmem_request_hook; + shmem_request_hook = ptrack_shmem_request; +#else + RequestAddinShmemSpace(PtrackActualSize); +#endif + } + else + ptrackCleanFiles(); + + /* Install hooks */ + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = ptrack_shmem_startup_hook; + prev_copydir_hook = copydir_hook; + copydir_hook = ptrack_copydir_hook; + prev_mdwrite_hook = mdwrite_hook; + mdwrite_hook = ptrack_mdwrite_hook; + prev_mdextend_hook = mdextend_hook; + mdextend_hook = ptrack_mdextend_hook; + prev_ProcessSyncRequests_hook = ProcessSyncRequests_hook; + ProcessSyncRequests_hook = ptrack_ProcessSyncRequests_hook; +#if PG_VERSION_NUM >= 170000 + prev_backup_checkpoint_request_hook = backup_checkpoint_request_hook; + backup_checkpoint_request_hook = ptrack_backup_checkpoint_request_hook; +#endif +} + +#if PG_VERSION_NUM >= 150000 +static void +ptrack_shmem_request(void) +{ + if (prev_shmem_request_hook) + prev_shmem_request_hook(); + RequestAddinShmemSpace(PtrackActualSize); } +#endif /* - * Module unload callback + * ptrack_shmem_startup hook: allocate or attach to shared memory. */ -void -_PG_fini(void) +static void +ptrack_shmem_startup_hook(void) { + bool map_found; -} + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); -/********************************************************************/ -/* Datapage bitmapping structures and routines taken from pg_rewind */ -/* TODO: consider moving to another location */ -struct datapagemap -{ - char *bitmap; - int bitmapsize; -}; -typedef struct datapagemap datapagemap_t; + /* + * Create or attach to the shared memory state + */ + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); -struct datapagemap_iterator -{ - datapagemap_t *map; - BlockNumber nextblkno; -}; -typedef struct datapagemap_iterator datapagemap_iterator_t; -static void datapagemap_add(datapagemap_t *map, BlockNumber blkno); + if (ptrack_map_size != 0) + { + ptrack_map = ShmemInitStruct("ptrack map", + PtrackActualSize, + &map_found); + if (!map_found) + { + ptrackMapInit(); + elog(DEBUG1, "Shared memory for ptrack is ready"); + } + } + else + { + ptrack_map = NULL; + } + LWLockRelease(AddinShmemInitLock); +} + +/* + * Ptrack follow up for copydir() routine. It parses database OID + * and tablespace OID from path string. We do not need to recursively + * walk subdirs here, copydir() will do it for us if needed. + */ static void -datapagemap_add(datapagemap_t *map, BlockNumber blkno) +ptrack_copydir_hook(const char *path) { - int offset; - int bitno; + Oid spcOid = InvalidOid; + Oid dbOid = InvalidOid; + int oidchars; + char oidbuf[OIDCHARS + 1]; - offset = blkno / 8; - bitno = blkno % 8; + elog(DEBUG1, "ptrack_copydir_hook: path %s", path); - /* enlarge or create bitmap if needed */ - if (map->bitmapsize <= offset) + if (strstr(path, "global/") == path) + spcOid = GLOBALTABLESPACE_OID; + else if (strstr(path, "base/") == path) { - int oldsize = map->bitmapsize; - int newsize; + spcOid = DEFAULTTABLESPACE_OID; + oidchars = strspn(path + 5, "0123456789"); + strncpy(oidbuf, path + 5, oidchars); + oidbuf[oidchars] = '\0'; + dbOid = atooid(oidbuf); + } + else if (strstr(path, "pg_tblspc/") == path) + { + char *dbPos; + + oidchars = strspn(path + 10, "0123456789"); + strncpy(oidbuf, path + 10, oidchars); + oidbuf[oidchars] = '\0'; + spcOid = atooid(oidbuf); + + dbPos = strstr(path, TABLESPACE_VERSION_DIRECTORY) + strlen(TABLESPACE_VERSION_DIRECTORY) + 1; + oidchars = strspn(dbPos, "0123456789"); + strncpy(oidbuf, dbPos, oidchars); + oidbuf[oidchars] = '\0'; + dbOid = atooid(oidbuf); + } - /* - * The minimum to hold the new bit is offset + 1. But add some - * headroom, so that we don't need to repeatedly enlarge the bitmap in - * the common case that blocks are modified in order, from beginning - * of a relation to the end. - */ - newsize = offset + 1; - newsize += 10; + elog(DEBUG1, "ptrack_copydir_hook: spcOid %u, dbOid %u", spcOid, dbOid); - if (map->bitmap != NULL) - map->bitmap = repalloc(map->bitmap, newsize); - else - map->bitmap = palloc(newsize); + ptrack_walkdir(path, spcOid, dbOid); - /* zero out the newly allocated region */ - memset(&map->bitmap[oldsize], 0, newsize - oldsize); + if (prev_copydir_hook) + prev_copydir_hook(path); +} - map->bitmapsize = newsize; - } +static void +ptrack_mdwrite_hook(RelFileNodeBackend smgr_rnode, + ForkNumber forknum, BlockNumber blocknum) +{ + ptrack_mark_block(smgr_rnode, forknum, blocknum); + + if (prev_mdwrite_hook) + prev_mdwrite_hook(smgr_rnode, forknum, blocknum); +} + +static void +ptrack_mdextend_hook(RelFileNodeBackend smgr_rnode, + ForkNumber forknum, BlockNumber blocknum) +{ + ptrack_mark_block(smgr_rnode, forknum, blocknum); + + if (prev_mdextend_hook) + prev_mdextend_hook(smgr_rnode, forknum, blocknum); +} - /* Set the bit */ - map->bitmap[offset] |= (1 << bitno); +static void +ptrack_ProcessSyncRequests_hook() +{ + ptrackCheckpoint(); + + if (prev_ProcessSyncRequests_hook) + prev_ProcessSyncRequests_hook(); } -/********************************************************************/ +#if PG_VERSION_NUM >= 170000 +static void +ptrack_backup_checkpoint_request_hook(void) +{ + ptrack_set_init_lsn(); + + if (prev_backup_checkpoint_request_hook) + prev_backup_checkpoint_request_hook(); +} +#endif /* * Recursively walk through the path and add all data files to filelist. */ static void ptrack_gather_filelist(List **filelist, char *path, Oid spcOid, Oid dbOid) { - DIR *dir; + DIR *dir; struct dirent *de; - dir = AllocateDir(path); while ((de = ReadDirExtended(dir, path, LOG)) != NULL) @@ -160,47 +316,66 @@ ptrack_gather_filelist(List **filelist, char *path, Oid spcOid, Oid dbOid) if (sret < 0) { - ereport(LOG, + ereport(WARNING, (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", subpath))); + errmsg("ptrack: could not stat file \"%s\": %m", subpath))); continue; } if (S_ISREG(fst.st_mode)) { + if (fst.st_size == 0) + { + elog(DEBUG3, "ptrack: skip empty file %s", subpath); + + /* But try the next one */ + continue; + } + /* Regular file inside database directory, otherwise skip it */ if (dbOid != InvalidOid || spcOid == GLOBALTABLESPACE_OID) { - int oidchars; - char oidbuf[OIDCHARS + 1]; - char *segpath; +#if PG_VERSION_NUM >= 170000 + RelFileNumber relNumber; + unsigned segno; +#else + int oidchars; + char oidbuf[OIDCHARS + 1]; +#endif + char *segpath; PtrackFileList_i *pfl = palloc0(sizeof(PtrackFileList_i)); /* * Check that filename seems to be a regular relation file. */ +#if PG_VERSION_NUM >= 170000 + if (!parse_filename_for_nontemp_relation(de->d_name, &relNumber, &pfl->forknum, &segno)) + continue; +#else if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, &pfl->forknum)) continue; +#endif + /* Parse segno */ + segpath = strstr(de->d_name, "."); + pfl->segno = segpath != NULL ? atoi(segpath + 1) : 0; - /* Parse segno for main fork */ - if (pfl->forknum == MAIN_FORKNUM) - { - segpath = strstr(de->d_name, "."); - pfl->segno = segpath != NULL ? atoi(segpath + 1) : 0; - } - else - pfl->segno = 0; - + /* Fill the pfl in */ +#if PG_VERSION_NUM >= 170000 + nodeRel(pfl->relnode) = relNumber; +#else memcpy(oidbuf, de->d_name, oidchars); oidbuf[oidchars] = '\0'; - pfl->relnode.relNode = atooid(oidbuf); - pfl->relnode.dbNode = dbOid; - pfl->relnode.spcNode = spcOid == InvalidOid ? DEFAULTTABLESPACE_OID : spcOid; - pfl->path = GetRelationPath(dbOid, pfl->relnode.spcNode, - pfl->relnode.relNode, InvalidBackendId, pfl->forknum); + nodeRel(pfl->relnode) = atooid(oidbuf); +#endif + nodeDb(pfl->relnode) = dbOid; + nodeSpc(pfl->relnode) = spcOid == InvalidOid ? DEFAULTTABLESPACE_OID : spcOid; + pfl->path = GetRelationPath(dbOid, nodeSpc(pfl->relnode), + nodeRel(pfl->relnode), InvalidBackendId, pfl->forknum); *filelist = lappend(*filelist, pfl); - // elog(WARNING, "added file %s of rel %u to ptrack list", pfl->path, pfl->relnode.relNode); + + elog(DEBUG3, "ptrack: added file %s of rel %u to file list", + pfl->path, nodeRel(pfl->relnode)); } } else if (S_ISDIR(fst.st_mode)) @@ -211,40 +386,55 @@ ptrack_gather_filelist(List **filelist, char *path, Oid spcOid, Oid dbOid) else if (spcOid != InvalidOid && strcmp(de->d_name, TABLESPACE_VERSION_DIRECTORY) == 0) ptrack_gather_filelist(filelist, subpath, spcOid, InvalidOid); } - // TODO: is it enough to properly check symlink support? -#ifndef WIN32 + /* TODO: is it enough to properly check symlink support? */ +#if !defined(WIN32) || (PG_VERSION_NUM >= 160000) else if (S_ISLNK(fst.st_mode)) #else else if (pgwin32_is_junction(subpath)) #endif { - /* We expect that symlinks with only digits in the name to be tablespaces */ + /* + * We expect that symlinks with only digits in the name to be + * tablespaces + */ if (strspn(de->d_name + 1, "0123456789") == strlen(de->d_name + 1)) ptrack_gather_filelist(filelist, subpath, atooid(de->d_name), InvalidOid); } } - FreeDir(dir); /* we ignore any error here */ + FreeDir(dir); /* we ignore any error here */ } static int -ptrack_filelist_getnext(PtScanCtx *ctx) +ptrack_filelist_getnext(PtScanCtx * ctx) { - PtrackFileList_i *pfl = NULL; - ListCell *cell; - char *fullpath; - struct stat fst; + PtrackFileList_i *pfl = NULL; + ListCell *cell; + char *fullpath; + struct stat fst; + uint32 rel_st_size = 0; + +get_next: /* No more file in the list */ if (list_length(ctx->filelist) == 0) return -1; +#ifdef foreach_current_index + /* Get first file from the head */ + cell = list_tail(ctx->filelist); + pfl = (PtrackFileList_i *) lfirst(cell); + + /* Remove this file from the list */ + ctx->filelist = list_delete_last(ctx->filelist); +#else /* Get first file from the head */ cell = list_head(ctx->filelist); pfl = (PtrackFileList_i *) lfirst(cell); /* Remove this file from the list */ ctx->filelist = list_delete_first(ctx->filelist); +#endif if (pfl->segno > 0) { @@ -258,30 +448,40 @@ ptrack_filelist_getnext(PtScanCtx *ctx) ctx->relpath = pfl->path; } - ctx->bid.relnode.spcNode = pfl->relnode.spcNode; - ctx->bid.relnode.dbNode = pfl->relnode.dbNode; - ctx->bid.relnode.relNode = pfl->relnode.relNode; + nodeSpc(ctx->bid.relnode) = nodeSpc(pfl->relnode); + nodeDb(ctx->bid.relnode) = nodeDb(pfl->relnode); + nodeRel(ctx->bid.relnode) = nodeRel(pfl->relnode); ctx->bid.forknum = pfl->forknum; ctx->bid.blocknum = 0; if (stat(fullpath, &fst) != 0) { - elog(WARNING, "cannot stat file %s", fullpath); + elog(WARNING, "ptrack: cannot stat file %s", fullpath); /* But try the next one */ - return ptrack_filelist_getnext(ctx); + goto get_next; + } + + rel_st_size = fst.st_size; + + if (rel_st_size == 0) + { + elog(DEBUG3, "ptrack: skip empty file %s", fullpath); + + /* But try the next one */ + goto get_next; } if (pfl->segno > 0) { - ctx->relsize = pfl->segno * RELSEG_SIZE + fst.st_size / BLCKSZ; + ctx->relsize = pfl->segno * RELSEG_SIZE + rel_st_size / BLCKSZ; ctx->bid.blocknum = pfl->segno * RELSEG_SIZE; } else /* Estimate relsize as size of first segment in blocks */ - ctx->relsize = fst.st_size / BLCKSZ; + ctx->relsize = rel_st_size / BLCKSZ; - elog(DEBUG3, "got file %s with size %u from the ptrack list", pfl->path, ctx->relsize); + elog(DEBUG3, "ptrack: got file %s with size %u from the file list", pfl->path, ctx->relsize); return 0; } @@ -299,83 +499,37 @@ ptrack_version(PG_FUNCTION_ARGS) /* * Function to get last ptrack map initialization LSN. */ -PG_FUNCTION_INFO_V1(pg_ptrack_control_lsn); +PG_FUNCTION_INFO_V1(ptrack_init_lsn); Datum -pg_ptrack_control_lsn(PG_FUNCTION_ARGS) +ptrack_init_lsn(PG_FUNCTION_ARGS) { if (ptrack_map != NULL) - PG_RETURN_LSN(ptrack_map->init_lsn); + { + XLogRecPtr init_lsn = pg_atomic_read_u64(&ptrack_map->init_lsn); + + PG_RETURN_LSN(init_lsn); + } else { - elog(DEBUG1, "pg_ptrack_control_lsn(). no ptrack_map"); + elog(WARNING, "ptrack is disabled"); PG_RETURN_LSN(InvalidXLogRecPtr); } } -/* - * Function to retrieve blocks via buffercache. - */ -PG_FUNCTION_INFO_V1(pg_ptrack_get_block); -Datum -pg_ptrack_get_block(PG_FUNCTION_ARGS) -{ - Oid tablespace_oid = PG_GETARG_OID(0); - Oid db_oid = PG_GETARG_OID(1); - Oid relfilenode = PG_GETARG_OID(2); - BlockNumber blkno = PG_GETARG_UINT32(3); - bytea *raw_page; - char *raw_page_data; - Buffer buf; - RelFileNode rnode; - BlockNumber nblocks; - SMgrRelation smgr; - - rnode.dbNode = db_oid; - rnode.spcNode = tablespace_oid; - rnode.relNode = relfilenode; - - elog(DEBUG1, "pg_ptrack_get_block(%i, %i, %i, %u)", - tablespace_oid, db_oid, relfilenode, blkno); - smgr = smgropen(rnode, InvalidBackendId); - nblocks = smgrnblocks(smgr, MAIN_FORKNUM); - - if (blkno >= nblocks) - PG_RETURN_NULL(); - - /* Initialize buffer to copy to */ - raw_page = (bytea *) palloc0(BLCKSZ + VARHDRSZ); - SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ); - raw_page_data = VARDATA(raw_page); - - buf = ReadBufferWithoutRelcache(rnode, MAIN_FORKNUM, blkno, RBM_NORMAL, NULL); - - if (buf == InvalidBuffer) - elog(ERROR, "Block is not found in the buffer cache"); - - LockBuffer(buf, BUFFER_LOCK_SHARE); - - memcpy(raw_page_data, BufferGetPage(buf), BLCKSZ); - - LockBuffer(buf, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buf); - - PG_RETURN_BYTEA_P(raw_page); -} - /* * Return set of database blocks which were changed since specified LSN. * This function may return false positives (blocks that have not been updated). */ -PG_FUNCTION_INFO_V1(pg_ptrack_get_pagemapset); +PG_FUNCTION_INFO_V1(ptrack_get_pagemapset); Datum -pg_ptrack_get_pagemapset(PG_FUNCTION_ARGS) +ptrack_get_pagemapset(PG_FUNCTION_ARGS) { - FuncCallContext *funcctx; - PtScanCtx *ctx; - MemoryContext oldcontext; - XLogRecPtr update_lsn; - datapagemap_t pagemap; - char gather_path[MAXPGPATH]; + PtScanCtx *ctx; + FuncCallContext *funcctx; + MemoryContext oldcontext; + datapagemap_t pagemap; + int64 pagecount = 0; + char gather_path[MAXPGPATH]; /* Exit immediately if there is no map */ if (ptrack_map == NULL) @@ -384,6 +538,7 @@ pg_ptrack_get_pagemapset(PG_FUNCTION_ARGS) if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; + funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); @@ -392,11 +547,15 @@ pg_ptrack_get_pagemapset(PG_FUNCTION_ARGS) ctx->lsn = PG_GETARG_LSN(0); ctx->filelist = NIL; - // get_call_result_type(fcinfo, NULL, &funcctx->tuple_desc); /* Make tuple descriptor */ - tupdesc = CreateTemplateTupleDesc(2); +#if PG_VERSION_NUM >= 120000 + tupdesc = CreateTemplateTupleDesc(3); +#else + tupdesc = CreateTemplateTupleDesc(3, false); +#endif TupleDescInitEntry(tupdesc, (AttrNumber) 1, "path", TEXTOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 2, "pagemap", BYTEAOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "pagecount", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "pagemap", BYTEAOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); funcctx->user_fctx = ctx; @@ -432,17 +591,24 @@ pg_ptrack_get_pagemapset(PG_FUNCTION_ARGS) while (true) { + uint64 hash; + size_t slot1; + size_t slot2; + XLogRecPtr update_lsn1; + XLogRecPtr update_lsn2; + /* Stop traversal if there are no more segments */ - if (ctx->bid.blocknum > ctx->relsize) + if (ctx->bid.blocknum >= ctx->relsize) { /* We completed a segment and there is a bitmap to return */ if (pagemap.bitmap != NULL) { - Datum values[2]; - bool nulls[2] = {false}; - char pathname[MAXPGPATH]; - bytea *result = NULL; - Size result_sz = pagemap.bitmapsize + VARHDRSZ; + Datum values[3]; + bool nulls[3] = {false}; + char pathname[MAXPGPATH]; + bytea *result = NULL; + Size result_sz = pagemap.bitmapsize + VARHDRSZ; + HeapTuple htup = NULL; /* Create a bytea copy of our bitmap */ result = (bytea *) palloc(result_sz); @@ -452,32 +618,55 @@ pg_ptrack_get_pagemapset(PG_FUNCTION_ARGS) strcpy(pathname, ctx->relpath); values[0] = CStringGetTextDatum(pathname); - values[1] = PointerGetDatum(result); + values[1] = Int64GetDatum(pagecount); + values[2] = PointerGetDatum(result); pfree(pagemap.bitmap); pagemap.bitmap = NULL; pagemap.bitmapsize = 0; + pagecount = 0; - SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(heap_form_tuple(funcctx->tuple_desc, values, nulls))); - } - else - { - /* We have just processed unchanged file, let's pick next */ - if (ptrack_filelist_getnext(ctx) < 0) - SRF_RETURN_DONE(funcctx); + htup = heap_form_tuple(funcctx->tuple_desc, values, nulls); + if (htup) + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(htup)); } + + if (ptrack_filelist_getnext(ctx) < 0) + SRF_RETURN_DONE(funcctx); } - update_lsn = pg_atomic_read_u64(&PtrackContent(ptrack_map)[BID_HASH_FUNC(ctx->bid)]); + hash = BID_HASH_FUNC(ctx->bid); + slot1 = (size_t)(hash % PtrackContentNblocks); - if (update_lsn != InvalidXLogRecPtr) - elog(DEBUG3, "update_lsn %X/%X of blckno %u of file %s", - (uint32) (update_lsn >> 32), (uint32) update_lsn, - ctx->bid.blocknum, ctx->relpath); + update_lsn1 = pg_atomic_read_u64(&ptrack_map->entries[slot1]); - /* Block has been changed since specified LSN. Mark it in the bitmap */ - if (update_lsn >= ctx->lsn) - datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE)); +#if USE_ASSERT_CHECKING + if (update_lsn1 != InvalidXLogRecPtr) + elog(DEBUG3, "ptrack: update_lsn1 %X/%X of blckno %u of file %s", + (uint32) (update_lsn1 >> 32), (uint32) update_lsn1, + ctx->bid.blocknum, ctx->relpath); +#endif + + /* Only probe the second slot if the first one is marked */ + if (update_lsn1 >= ctx->lsn) + { + slot2 = (size_t)(((hash << 32) | (hash >> 32)) % PtrackContentNblocks); + update_lsn2 = pg_atomic_read_u64(&ptrack_map->entries[slot2]); + +#if USE_ASSERT_CHECKING + if (update_lsn2 != InvalidXLogRecPtr) + elog(DEBUG3, "ptrack: update_lsn2 %X/%X of blckno %u of file %s", + (uint32) (update_lsn1 >> 32), (uint32) update_lsn2, + ctx->bid.blocknum, ctx->relpath); +#endif + + /* Block has been changed since specified LSN. Mark it in the bitmap */ + if (update_lsn2 >= ctx->lsn) + { + pagecount += 1; + datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE)); + } + } ctx->bid.blocknum += 1; } diff --git a/ptrack.control b/ptrack.control index de4dbdf..7e3a2b7 100644 --- a/ptrack.control +++ b/ptrack.control @@ -1,5 +1,5 @@ # ptrack extension -comment = 'public API for internal ptrack engine' -default_version = '2.0' +comment = 'block-level incremental backup engine' +default_version = '2.4' module_pathname = '$libdir/ptrack' relocatable = true diff --git a/ptrack.h b/ptrack.h new file mode 100644 index 0000000..abeffb3 --- /dev/null +++ b/ptrack.h @@ -0,0 +1,88 @@ +/*------------------------------------------------------------------------- + * + * ptrack.h + * header for ptrack map for tracking updates of relation's pages + * + * + * Copyright (c) 2019-2022, Postgres Professional + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/storage/ptrack.h + * + *------------------------------------------------------------------------- + */ +#ifndef PTRACK_H +#define PTRACK_H + +#include "access/xlogdefs.h" +#include "storage/block.h" +#include "storage/buf.h" +#if PG_VERSION_NUM >= 160000 +#include "storage/relfilelocator.h" +#else +#include "storage/relfilenode.h" +#endif +#include "storage/smgr.h" +#include "utils/relcache.h" + +/* Ptrack version as a string */ +#define PTRACK_VERSION "2.4" +/* Ptrack version as a number */ +#define PTRACK_VERSION_NUM 240 +/* Last ptrack version that changed map file format */ +#define PTRACK_MAP_FILE_VERSION_NUM 220 + +#if PG_VERSION_NUM >= 160000 +#define RelFileNode RelFileLocator +#define RelFileNodeBackend RelFileLocatorBackend +#define nodeDb(node) (node).dbOid +#define nodeSpc(node) (node).spcOid +#define nodeRel(node) (node).relNumber +#define nodeOf(ndbck) (ndbck).locator +#else +#define nodeDb(node) (node).dbNode +#define nodeSpc(node) (node).spcNode +#define nodeRel(node) (node).relNode +#define nodeOf(ndbck) (ndbck).node +#endif + +#if PG_VERSION_NUM >= 170000 +#define InvalidBackendId INVALID_PROC_NUMBER +#endif + +/* + * Structure identifying block on the disk. + */ +typedef struct PtBlockId +{ + RelFileNode relnode; + ForkNumber forknum; + BlockNumber blocknum; +} PtBlockId; + +/* + * Context for ptrack_get_pagemapset set returning function. + */ +typedef struct PtScanCtx +{ + XLogRecPtr lsn; + PtBlockId bid; + uint32 relsize; + char *relpath; + List *filelist; +} PtScanCtx; + +/* + * List item type for ptrack data files list. + */ +typedef struct PtrackFileList_i +{ + RelFileNode relnode; + ForkNumber forknum; + int segno; + char *path; + +} PtrackFileList_i; + +#endif /* PTRACK_H */ diff --git a/t/001_basic.pl b/t/001_basic.pl new file mode 100644 index 0000000..bdb1eca --- /dev/null +++ b/t/001_basic.pl @@ -0,0 +1,205 @@ +# +# Here we mostly do sanity checks and verify, that ptrack public API works +# as expected. Data integrity after incremental backups taken via ptrack +# is tested on the pg_probackup side. +# + +use strict; +use warnings; +use Test::More; + +my $pg_15_modules; + +BEGIN +{ + $pg_15_modules = eval + { + require PostgreSQL::Test::Cluster; + require PostgreSQL::Test::Utils; + return 1; + }; + + unless (defined $pg_15_modules) + { + $pg_15_modules = 0; + + require PostgresNode; + require TestLib; + } +} + +plan tests => 23; + +note('PostgreSQL 15 modules are used: ' . ($pg_15_modules ? 'yes' : 'no')); + +my $node; +my $res; +my $res_stdout; +my $res_stderr; + +# Create node. +# Older versions of PostgreSQL modules use get_new_node function. +# Newer use standard perl object constructor syntax. +eval +{ + if ($pg_15_modules) + { + $node = PostgreSQL::Test::Cluster->new("node"); + } + else + { + $node = PostgresNode::get_new_node("node"); + } +}; + +$node->init; +$node->start; + +# Could not load ptrack module after postmaster start +($res, $res_stdout, $res_stderr) = $node->psql("postgres", "CREATE EXTENSION ptrack"); +is($res, 3, 'errors out without shared_preload_libraries = \'ptrack\''); +like( + $res_stderr, + qr/ptrack module must be initialized by Postmaster/, + 'errors out without shared_preload_libraries = \'ptrack\''); + +# Load ptrack library +$node->append_conf( + 'postgresql.conf', q{ +wal_level = 'minimal' +shared_preload_libraries = 'ptrack' +log_min_messages = debug1 +}); +$node->restart; + +$node->safe_psql("postgres", "CREATE EXTENSION ptrack"); + +# Check some static functions +$node->safe_psql("postgres", "SELECT ptrack_version()"); + +# Could not use ptrack if disabled +($res, $res_stdout, $res_stderr) = $node->psql("postgres", "SELECT ptrack_get_pagemapset('0/0')"); +is($res, 3, 'errors out if ptrack is disabled'); +like( + $res_stderr, + qr/ptrack is disabled/, + 'errors out if ptrack is disabled'); +($res, $res_stdout, $res_stderr) = $node->psql("postgres", "SELECT ptrack_init_lsn()"); +is($res, 0, 'only warning if ptrack is disabled'); +like( + $res_stdout, + qr/0\/0/, + 'should print init LSN 0/0 if disabled'); +like( + $res_stderr, + qr/ptrack is disabled/, + 'warning if ptrack is disabled'); + +# Actually enable ptrack +$node->append_conf( + 'postgresql.conf', q{ +ptrack.map_size = 13 +}); +$node->stop; +$res = $node->start(fail_ok => 1); +is($res, 0, 'could not start with wal_level = \'minimal\''); +$node->append_conf( + 'postgresql.conf', q{ +wal_level = 'replica' +}); +$node->start; + +# Do checkpoint (test ptrack hook) +$node->safe_psql("postgres", "CHECKPOINT"); + +# Remember pg_current_wal_flush_lsn() value +my $flush_lsn = $node->safe_psql("postgres", "SELECT pg_current_wal_flush_lsn()"); + +# Remember ptrack init_lsn +my $init_lsn = $node->safe_psql("postgres", "SELECT ptrack_init_lsn()"); +unlike( + $init_lsn, + qr/0\/0/, + 'ptrack init LSN should not be 0/0 after CHECKPOINT'); + +# Ptrack map should survive crash +$node->stop('immediate'); +$node->start; +$res_stdout = $node->safe_psql("postgres", "SELECT ptrack_init_lsn()"); +is($res_stdout, $init_lsn, 'ptrack init_lsn should be the same after crash recovery'); + +# Do some stuff, which hits ptrack +$node->safe_psql("postgres", "CREATE DATABASE ptrack_test"); +$node->safe_psql("postgres", "CREATE TABLE ptrack_test AS SELECT i AS id FROM generate_series(0, 1000) i"); + +# Remember DB and relation oids +my $db_oid = $node->safe_psql("postgres", "SELECT oid FROM pg_database WHERE datname = 'ptrack_test'"); +my $rel_oid = $node->safe_psql("postgres", "SELECT relfilenode FROM pg_class WHERE relname = 'ptrack_test'"); + +# Data should survive clean restart +$node->restart; +$res_stdout = $node->safe_psql("postgres", "SELECT ptrack_get_pagemapset('$flush_lsn')"); +like( + $res_stdout, + qr/base\/$db_oid/, + 'ptrack pagemapset should contain new database oid'); +like( + $res_stdout, + qr/$rel_oid/, + 'ptrack pagemapset should contain new relation oid'); + +# Check change stats +$res_stdout = $node->safe_psql("postgres", "SELECT pages FROM ptrack_get_change_stat('$flush_lsn')"); +is($res_stdout > 0, 1, 'should be able to get aggregated stats of changes'); + +# We should be able to change ptrack map size (but loose all changes) +$node->append_conf( + 'postgresql.conf', q{ +ptrack.map_size = 14 +}); +$node->restart; + +$node->safe_psql("postgres", "CHECKPOINT"); +$res_stdout = $node->safe_psql("postgres", "SELECT ptrack_init_lsn()"); +unlike( + $res_stdout, + qr/0\/0/, + 'ptrack init LSN should not be 0/0 after CHECKPOINT'); +ok($res_stdout ne $init_lsn, 'ptrack init_lsn should not be the same after map resize'); +$res_stdout = $node->safe_psql("postgres", "SELECT ptrack_get_pagemapset('$flush_lsn')"); +unlike( + $res_stdout, + qr/base\/$db_oid/, + 'we should loose changes after ptrack map resize'); + +# We should be able to turn off ptrack and clean up all files by stting ptrack.map_size = 0 +$node->append_conf( + 'postgresql.conf', q{ +ptrack.map_size = 0 +}); +$node->restart; + +# Check that we have lost everything +ok(! -f $node->data_dir . "/global/ptrack.map", "ptrack.map should be cleaned up"); +ok(! -f $node->data_dir . "/global/ptrack.map.tmp", "ptrack.map.tmp should be cleaned up"); + +($res, $res_stdout, $res_stderr) = $node->psql("postgres", "SELECT ptrack_get_pagemapset('0/0')"); +is($res, 3, 'errors out if ptrack is disabled'); +like( + $res_stderr, + qr/ptrack is disabled/, + 'errors out if ptrack is disabled'); +($res, $res_stdout, $res_stderr) = $node->psql("postgres", "SELECT ptrack_init_lsn()"); +is($res, 0, 'only warning if ptrack is disabled'); +like( + $res_stdout, + qr/0\/0/, + 'should print init LSN 0/0 if disabled'); +like( + $res_stderr, + qr/ptrack is disabled/, + 'warning if ptrack is disabled'); + +$node->stop; + +done_testing;