mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2025-01-22 06:58:07 +00:00
Merge pull request #714 from bcj/bcj/automatic-backups
Turn on Automatic backups for the DB
This commit is contained in:
commit
593d5d309a
8 changed files with 657 additions and 2 deletions
14
README.md
14
README.md
|
@ -175,7 +175,6 @@ Instructions for running BookWyrm in production:
|
||||||
- Comment out the `command: certonly...` line in `docker-compose.yml`
|
- Comment out the `command: certonly...` line in `docker-compose.yml`
|
||||||
- Run docker-compose in the background with: `docker-compose up -d`
|
- Run docker-compose in the background with: `docker-compose up -d`
|
||||||
- Initialize the database with: `./bw-dev initdb`
|
- Initialize the database with: `./bw-dev initdb`
|
||||||
- Set up schedule backups with cron that runs that `docker-compose exec db pg_dump -U <databasename>` and saves the backup to a safe location
|
|
||||||
|
|
||||||
Congrats! You did it, go to your domain and enjoy the fruits of your labors.
|
Congrats! You did it, go to your domain and enjoy the fruits of your labors.
|
||||||
|
|
||||||
|
@ -205,3 +204,16 @@ There are three concepts in the book data model:
|
||||||
|
|
||||||
Whenever a user interacts with a book, they are interacting with a specific edition. Every work has a default edition, but the user can select other editions. Reviews aggregated for all editions of a work when you view an edition's page.
|
Whenever a user interacts with a book, they are interacting with a specific edition. Every work has a default edition, but the user can select other editions. Reviews aggregated for all editions of a work when you view an edition's page.
|
||||||
|
|
||||||
|
### Backups
|
||||||
|
|
||||||
|
Bookwyrm's db service dumps a backup copy of its database to its `/backups` directory daily at midnight UTC.
|
||||||
|
Backups are named `backup__%Y-%m-%d.sql`.
|
||||||
|
|
||||||
|
The db service has an optional script for periodically pruning the backups directory so that all recent daily backups are kept, but for older backups, only weekly or monthly backups are kept.
|
||||||
|
To enable this script:
|
||||||
|
- Uncomment the final line in `postgres-docker/cronfile`
|
||||||
|
- rebuild your instance `docker-compose up --build`
|
||||||
|
|
||||||
|
You can copy backups from the backups volume to your host machine with `docker cp`:
|
||||||
|
- Run `docker-compose ps` to confirm the db service's full name (it's probably `bookwyrm_db_1`.
|
||||||
|
- Run `docker cp <container_name>:/backups <host machine path>
|
||||||
|
|
|
@ -27,6 +27,8 @@ services:
|
||||||
db:
|
db:
|
||||||
build: postgres-docker
|
build: postgres-docker
|
||||||
env_file: .env
|
env_file: .env
|
||||||
|
entrypoint: /bookwyrm-entrypoint.sh
|
||||||
|
command: cron postgres
|
||||||
volumes:
|
volumes:
|
||||||
- pgdata:/var/lib/postgresql/data
|
- pgdata:/var/lib/postgresql/data
|
||||||
- backups:/backups
|
- backups:/backups
|
||||||
|
|
|
@ -3,8 +3,18 @@ FROM postgres:latest
|
||||||
# crontab
|
# crontab
|
||||||
RUN mkdir /backups
|
RUN mkdir /backups
|
||||||
COPY ./backup.sh /backups
|
COPY ./backup.sh /backups
|
||||||
|
COPY ./weed.sh /backups
|
||||||
COPY ./cronfile /etc/cron.d/cronfile
|
COPY ./cronfile /etc/cron.d/cronfile
|
||||||
RUN apt-get update && apt-get -y install cron
|
RUN apt-get update && apt-get -y install cron
|
||||||
RUN chmod 0644 /etc/cron.d/cronfile
|
RUN chmod 0644 /etc/cron.d/cronfile
|
||||||
RUN crontab /etc/cron.d/cronfile
|
RUN crontab /etc/cron.d/cronfile
|
||||||
RUN touch /var/log/cron.log
|
RUN touch /var/log/cron.log
|
||||||
|
|
||||||
|
# The postgres image's entrypoint expects the docker command to only contain flags to
|
||||||
|
# pass postgres. It runs the entrypoint twice, the second times as the postgres user.
|
||||||
|
# We need to start the cron service the first time it runs, when it's still being run
|
||||||
|
# as the root user. We're going to add a check that looks at the first argument and
|
||||||
|
# if it's 'cron', starts the service and then removes that argument.
|
||||||
|
RUN awk '$0 ~ /^\t_main "\$@"$/ { print "\tif [[ $1 == cron ]]; then\n\t\tservice cron start\n\t\tshift\n\tfi" }{ print }' docker-entrypoint.sh > bookwyrm-entrypoint.sh
|
||||||
|
RUN chown postgres /bookwyrm-entrypoint.sh
|
||||||
|
RUN chmod u=rwx,go=r /bookwyrm-entrypoint.sh
|
||||||
|
|
|
@ -1,2 +1,5 @@
|
||||||
0 0 * * * /backups/backup.sh
|
0 0 * * * /backups/backup.sh
|
||||||
|
# If uncommented, this script will weed the backups directory. It will keep the 14
|
||||||
|
# most-recent backups, then one backup/week for the next four backups, then one
|
||||||
|
# backup/month after that.
|
||||||
|
# 0 1 * * * /backups/weed.sh -d 14 -w 4 -m -1 /backups
|
||||||
|
|
8
postgres-docker/tests/Dockerfile
Normal file
8
postgres-docker/tests/Dockerfile
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
FROM postgres:latest
|
||||||
|
|
||||||
|
RUN apt update && apt install -y shellcheck
|
||||||
|
|
||||||
|
COPY ./tests/testing-entrypoint.sh /testing-entrypoint.sh
|
||||||
|
RUN chmod u+rx,go=r /testing-entrypoint.sh
|
||||||
|
COPY ./weed.sh /weed.sh
|
||||||
|
RUN chmod u+rx,go=r /weed.sh
|
9
postgres-docker/tests/docker-compose.yaml
Normal file
9
postgres-docker/tests/docker-compose.yaml
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
version: "3"
|
||||||
|
|
||||||
|
services:
|
||||||
|
weeding:
|
||||||
|
build:
|
||||||
|
# We need to build from the parent directory so we can access weed.sh
|
||||||
|
context: ..
|
||||||
|
dockerfile: ./tests/Dockerfile
|
||||||
|
entrypoint: /testing-entrypoint.sh
|
426
postgres-docker/tests/testing-entrypoint.sh
Normal file
426
postgres-docker/tests/testing-entrypoint.sh
Normal file
|
@ -0,0 +1,426 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# These tests are written to run in their own container, using the same image as the
|
||||||
|
# actual postgres service. To run: `docker-compose up --build`
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
source /weed.sh
|
||||||
|
|
||||||
|
ERROR_COUNT=0
|
||||||
|
FAILURE_COUNT=0
|
||||||
|
|
||||||
|
# compare two sorted files
|
||||||
|
function compare_files {
|
||||||
|
local expected="$1"
|
||||||
|
local actual="$2"
|
||||||
|
|
||||||
|
declare -a missing
|
||||||
|
local missing_index=0
|
||||||
|
declare -a extra
|
||||||
|
local extra_index=0
|
||||||
|
|
||||||
|
old_ifs="$IFS"
|
||||||
|
IFS=$'\n'
|
||||||
|
for line in $(diff --suppress-common-lines "$expected" "$actual"); do
|
||||||
|
if [[ $line =~ ^\< ]]; then
|
||||||
|
missing[missing_index]=${line:1}
|
||||||
|
missing_index=$((missing_index + 1))
|
||||||
|
elif [[ $line =~ ^\> ]]; then
|
||||||
|
extra[extra_index]=${line:1}
|
||||||
|
extra_index=$((extra_index + 1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
IFS="$old_ifs"
|
||||||
|
|
||||||
|
if [[ $((missing_index + extra_index)) -gt 0 ]]; then
|
||||||
|
echo 'fail'
|
||||||
|
|
||||||
|
if [[ missing_index -gt 0 ]]; then
|
||||||
|
echo -e "\\t$missing_index missing files:"
|
||||||
|
|
||||||
|
for index in $(seq 0 $((missing_index - 1))); do
|
||||||
|
echo -e "\\t\\t${missing[index]}"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ extra_index -gt 0 ]]; then
|
||||||
|
echo -e "\\t$extra_index extra files:"
|
||||||
|
|
||||||
|
for index in $(seq 0 $((extra_index - 1))); do
|
||||||
|
echo -e "\\t\\t${extra[index]}"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
FAILURE_COUNT=$((FAILURE_COUNT + 1))
|
||||||
|
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# This is a wrapper function that handles creating a directory with test files in it,
|
||||||
|
# running weed_directory (as the function, as a dry run, then finally actually-deleting
|
||||||
|
# files), marking the test as failed/errored as necessary, then cleaning up after
|
||||||
|
# itself. the first three arguments passed are the thresholds to pass into
|
||||||
|
# weed_directory. The remaining arguments are names of files to create for the test.
|
||||||
|
# Bash isn't great at passing arrays so instead of separately passing in a list of
|
||||||
|
# expected results, flag the files you expect to be deleted by prepending "DELETE:"
|
||||||
|
# to the path.
|
||||||
|
function perform_test {
|
||||||
|
echo "${FUNCNAME[1]}" | sed 's/^test_\(.*\)$/\1/' | tr '_\n' ' :'
|
||||||
|
echo -en '\t'
|
||||||
|
|
||||||
|
local daily_threshold="$1"
|
||||||
|
shift
|
||||||
|
local weekly_threshold="$1"
|
||||||
|
shift
|
||||||
|
local monthly_threshold="$1"
|
||||||
|
shift
|
||||||
|
|
||||||
|
# We might as well name the files we're using for running tests in as inflamatory a
|
||||||
|
# way as possible to increase the chances that bad filtering by weed_directory
|
||||||
|
# results in tests failing.
|
||||||
|
local expected="/testing/expected/backup__2020-02-02.sql"
|
||||||
|
local actual="/testing/backup__2020-02-02.sql.actual"
|
||||||
|
local remaining="/testing/remainbackup__2020-02-02.sql"
|
||||||
|
local temp="/testing/backup__2020-TE-MP.sql"
|
||||||
|
|
||||||
|
# create test files
|
||||||
|
mkdir -p /testing/expected
|
||||||
|
if [[ -e "$expected" ]]; then
|
||||||
|
rm "$expected"
|
||||||
|
fi
|
||||||
|
touch "$expected"
|
||||||
|
echo -e "$expected\\n$actual\\n$remaining\\n$temp" > "$remaining"
|
||||||
|
while [[ "$#" -gt 0 ]]; do
|
||||||
|
if [[ "$1" =~ ^DELETE: ]]; then
|
||||||
|
path="/testing/${1:7}"
|
||||||
|
echo "$path" >> "$expected"
|
||||||
|
else
|
||||||
|
path="/testing/$1"
|
||||||
|
echo "$path" >> "$remaining"
|
||||||
|
fi
|
||||||
|
|
||||||
|
directory=$(dirname "$path")
|
||||||
|
mkdir -p "$directory"
|
||||||
|
touch "$path"
|
||||||
|
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
# We don't make any promise about the order files will be listed in by
|
||||||
|
# weed_directory (it is currently reverse-chronological). We should sort the output
|
||||||
|
# and the expected file instead of forcing tests to list files in that order (or
|
||||||
|
# causing tests to fail if weed_directory's order changes)
|
||||||
|
sort "$expected" > "$temp"
|
||||||
|
mv "$temp" "$expected"
|
||||||
|
sort "$remaining" > "$temp"
|
||||||
|
mv "$temp" "$remaining"
|
||||||
|
|
||||||
|
# Part one: call the function directly
|
||||||
|
set +e
|
||||||
|
(
|
||||||
|
weed_directory \
|
||||||
|
"/testing" \
|
||||||
|
"$daily_threshold" \
|
||||||
|
"$weekly_threshold" \
|
||||||
|
"$monthly_threshold" \
|
||||||
|
2> "$temp" \
|
||||||
|
| sort > "$actual"
|
||||||
|
)
|
||||||
|
local result="$?"
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ "$result" -ne 0 ]]; then
|
||||||
|
echo 'error'
|
||||||
|
ERROR_COUNT=$((ERROR_COUNT + 1))
|
||||||
|
if [[ -s "$temp" ]]; then
|
||||||
|
echo 'stderr:'
|
||||||
|
cat "$temp"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
set +e
|
||||||
|
compare_files "$expected" "$actual"
|
||||||
|
result="$?"
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ "$result" -eq 0 ]]; then
|
||||||
|
# Part two: as a script with the dry-run flag (-l)
|
||||||
|
set +e
|
||||||
|
(
|
||||||
|
"/weed.sh" \
|
||||||
|
"-d" "$daily_threshold" \
|
||||||
|
"-w" "$weekly_threshold" \
|
||||||
|
"-m" "$monthly_threshold" \
|
||||||
|
"-l" \
|
||||||
|
"/testing" \
|
||||||
|
2> "$temp" \
|
||||||
|
| sort > "$actual"
|
||||||
|
)
|
||||||
|
local result="$?"
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ "$result" -ne 0 ]]; then
|
||||||
|
echo 'error'
|
||||||
|
ERROR_COUNT=$((ERROR_COUNT + 1))
|
||||||
|
if [[ -s "$temp" ]]; then
|
||||||
|
echo 'stderr:'
|
||||||
|
cat "$temp"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
set +e
|
||||||
|
compare_files "$expected" "$actual"
|
||||||
|
result="$?"
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ "$result" -eq 0 ]]; then
|
||||||
|
# Part three: let's try actually deleting files
|
||||||
|
set +e
|
||||||
|
(
|
||||||
|
"/weed.sh" \
|
||||||
|
"-d" "$daily_threshold" \
|
||||||
|
"-w" "$weekly_threshold" \
|
||||||
|
"-m" "$monthly_threshold" \
|
||||||
|
"/testing" \
|
||||||
|
2> "$temp"
|
||||||
|
)
|
||||||
|
local result="$?"
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ "$result" -ne 0 ]]; then
|
||||||
|
echo 'error'
|
||||||
|
ERROR_COUNT=$((ERROR_COUNT + 1))
|
||||||
|
if [[ -s "$temp" ]]; then
|
||||||
|
echo 'stderr:'
|
||||||
|
cat "$temp"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
find /testing -type f | sort > "$actual"
|
||||||
|
|
||||||
|
set +e
|
||||||
|
compare_files "$remaining" "$actual"
|
||||||
|
result="$?"
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ "$result" -eq 0 ]]; then
|
||||||
|
echo 'pass'
|
||||||
|
elif [[ -s "$temp" ]]; then
|
||||||
|
echo 'stderr:'
|
||||||
|
cat "$temp"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
elif [[ -s "$temp" ]]; then
|
||||||
|
echo 'stderr:'
|
||||||
|
cat "$temp"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
elif [[ -s "$temp" ]]; then
|
||||||
|
echo 'stderr:'
|
||||||
|
cat "$temp"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
rm -rf /testing
|
||||||
|
}
|
||||||
|
|
||||||
|
# actual tests
|
||||||
|
function test_shellcheck {
|
||||||
|
echo -en 'running shellcheck on scripts:\t'
|
||||||
|
shellcheck /weed.sh
|
||||||
|
# Test the tests too! Writing bash is hard
|
||||||
|
shellcheck -x /testing-entrypoint.sh
|
||||||
|
echo 'pass'
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_empty_directory {
|
||||||
|
perform_test 1 2 3
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_single_file {
|
||||||
|
perform_test 1 2 3 "backup__2021-02-02.sql"
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_keep_everything {
|
||||||
|
perform_test -1 0 0 "backup__2021-02-02.sql" "backup__2021-02-01.sql" "backup__2021-01-31.sql"
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_keep_one {
|
||||||
|
perform_test 1 0 0 "backup__2021-02-02.sql" "DELETE:backup__2021-02-01.sql" "DELETE:backup__2021-01-31.sql"
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_weekly {
|
||||||
|
# weed.sh follows ISO 8601 and uses %W for day of week, so Monday is the first day
|
||||||
|
# of the week.
|
||||||
|
# backup__2021-03-08.sql: Monday (keep)
|
||||||
|
# backup__2021-03-07.sql: Sunday (keep)
|
||||||
|
# backup__2021-02-28.sql: Sunday (keep)
|
||||||
|
# backup__2021-02-22.sql: Monday (delete)
|
||||||
|
# backup__2021-02-20.sql: Saturday (keep)
|
||||||
|
# backup__2021-02-16.sql: Tuesday (delete)
|
||||||
|
# backup__2021-02-15.sql: Monday (delete)
|
||||||
|
# backup__2021-02-14.sql: Sunday (keep)
|
||||||
|
# backup__2020-02-14.sql: Sunday (same week of year) (keep)
|
||||||
|
perform_test 0 -1 0 \
|
||||||
|
"backup__2021-03-08.sql" \
|
||||||
|
"backup__2021-03-07.sql" \
|
||||||
|
"backup__2021-02-28.sql" \
|
||||||
|
"DELETE:backup__2021-02-22.sql" \
|
||||||
|
"backup__2021-02-20.sql" \
|
||||||
|
"DELETE:backup__2021-02-16.sql" \
|
||||||
|
"DELETE:backup__2021-02-15.sql" \
|
||||||
|
"backup__2021-02-14.sql" \
|
||||||
|
"backup__2020-02-14.sql"
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_monthly {
|
||||||
|
perform_test 1 0 -1 \
|
||||||
|
"backup__2021-03-08.sql" \
|
||||||
|
"DELETE:backup__2021-03-07.sql" \
|
||||||
|
"backup__2021-02-28.sql" \
|
||||||
|
"DELETE:backup__2021-02-22.sql" \
|
||||||
|
"DELETE:backup__2021-02-20.sql" \
|
||||||
|
"DELETE:backup__2021-02-16.sql" \
|
||||||
|
"DELETE:backup__2021-02-15.sql" \
|
||||||
|
"DELETE:backup__2021-02-14.sql" \
|
||||||
|
"backup__2021-01-14.sql" \
|
||||||
|
"backup__2020-01-13.sql"
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_annual {
|
||||||
|
perform_test 0 0 0 \
|
||||||
|
"backup__2021-03-08.sql" \
|
||||||
|
"DELETE:backup__2021-03-07.sql" \
|
||||||
|
"DELETE:backup__2021-02-28.sql" \
|
||||||
|
"DELETE:backup__2021-02-22.sql" \
|
||||||
|
"DELETE:backup__2021-02-20.sql" \
|
||||||
|
"DELETE:backup__2021-02-16.sql" \
|
||||||
|
"DELETE:backup__2021-02-15.sql" \
|
||||||
|
"DELETE:backup__2021-02-14.sql" \
|
||||||
|
"DELETE:backup__2021-01-14.sql" \
|
||||||
|
"backup__2020-01-13.sql" \
|
||||||
|
"backup__2019-12-31.sql" \
|
||||||
|
"DELETE:backup__2019-01-13.sql"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Will not pass while maxdepth is set to 1.
|
||||||
|
function skip_test_sort_order {
|
||||||
|
perform_test 0 0 1 \
|
||||||
|
"a/backup__2021-03-08.sql" \
|
||||||
|
"DELETE:b/backup__2021-03-07.sql" \
|
||||||
|
"DELETE:a/backup__2021-02-28.sql" \
|
||||||
|
"DELETE:b/backup__2021-02-22.sql" \
|
||||||
|
"DELETE:a/backup__2021-02-20.sql" \
|
||||||
|
"DELETE:b/backup__2021-02-16.sql" \
|
||||||
|
"DELETE:a/backup__2021-02-15.sql" \
|
||||||
|
"DELETE:b/backup__2021-02-14.sql" \
|
||||||
|
"DELETE:a/backup__2021-01-14.sql" \
|
||||||
|
"b/backup__2020-01-13.sql" \
|
||||||
|
"a/backup__2019-12-31.sql" \
|
||||||
|
"DELETE:b/backup__2019-01-13.sql"
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_ignore_subdirectories {
|
||||||
|
perform_test 0 0 0 "a/backup__2021-03-08.sql" "backup__2021-03-07.sql"
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_standard {
|
||||||
|
perform_test 14 4 1 \
|
||||||
|
"backup__2021-03-08.sql" \
|
||||||
|
"backup__2021-03-07.sql" \
|
||||||
|
"backup__2021-03-06.sql" \
|
||||||
|
"backup__2021-03-05.sql" \
|
||||||
|
"backup__2021-03-04.sql" \
|
||||||
|
"backup__2021-03-03.sql" \
|
||||||
|
"backup__2021-03-02.sql" \
|
||||||
|
"backup__2021-03-01.sql" \
|
||||||
|
"backup__2021-02-28.sql" \
|
||||||
|
"backup__2021-02-27.sql" \
|
||||||
|
"backup__2021-02-26.sql" \
|
||||||
|
"backup__2021-02-25.sql" \
|
||||||
|
"backup__2021-02-24.sql" \
|
||||||
|
"backup__2021-02-23.sql" \
|
||||||
|
"DELETE:backup__2021-02-22.sql" \
|
||||||
|
"backup__2021-02-21.sql" \
|
||||||
|
"DELETE:backup__2021-02-20.sql" \
|
||||||
|
"DELETE:backup__2021-02-19.sql" \
|
||||||
|
"DELETE:backup__2021-02-18.sql" \
|
||||||
|
"DELETE:backup__2021-02-17.sql" \
|
||||||
|
"DELETE:backup__2021-02-16.sql" \
|
||||||
|
"DELETE:backup__2021-02-15.sql" \
|
||||||
|
"backup__2021-02-14.sql" \
|
||||||
|
"DELETE:backup__2021-02-13.sql" \
|
||||||
|
"DELETE:backup__2021-02-12.sql" \
|
||||||
|
"DELETE:backup__2021-02-11.sql" \
|
||||||
|
"DELETE:backup__2021-02-10.sql" \
|
||||||
|
"DELETE:backup__2021-02-09.sql" \
|
||||||
|
"DELETE:backup__2021-02-08.sql" \
|
||||||
|
"backup__2021-02-07.sql" \
|
||||||
|
"DELETE:backup__2021-02-06.sql" \
|
||||||
|
"DELETE:backup__2021-02-05.sql" \
|
||||||
|
"DELETE:backup__2021-02-04.sql" \
|
||||||
|
"DELETE:backup__2021-02-03.sql" \
|
||||||
|
"DELETE:backup__2021-02-02.sql" \
|
||||||
|
"DELETE:backup__2021-02-01.sql" \
|
||||||
|
"backup__2021-01-31.sql" \
|
||||||
|
"DELETE:backup__2021-01-30.sql" \
|
||||||
|
"DELETE:backup__2021-01-29.sql" \
|
||||||
|
"DELETE:backup__2021-01-28.sql" \
|
||||||
|
"DELETE:backup__2021-01-27.sql" \
|
||||||
|
"DELETE:backup__2021-01-26.sql" \
|
||||||
|
"DELETE:backup__2021-01-25.sql" \
|
||||||
|
"DELETE:backup__2021-01-24.sql" \
|
||||||
|
"DELETE:backup__2021-01-23.sql" \
|
||||||
|
"DELETE:backup__2021-01-22.sql" \
|
||||||
|
"DELETE:backup__2021-01-21.sql" \
|
||||||
|
"DELETE:backup__2021-01-20.sql" \
|
||||||
|
"DELETE:backup__2021-01-19.sql" \
|
||||||
|
"DELETE:backup__2021-01-18.sql" \
|
||||||
|
"DELETE:backup__2021-01-17.sql" \
|
||||||
|
"DELETE:backup__2021-01-16.sql" \
|
||||||
|
"DELETE:backup__2021-01-15.sql" \
|
||||||
|
"DELETE:backup__2021-01-14.sql" \
|
||||||
|
"DELETE:backup__2021-01-13.sql" \
|
||||||
|
"DELETE:backup__2021-01-12.sql" \
|
||||||
|
"DELETE:backup__2021-01-11.sql" \
|
||||||
|
"DELETE:backup__2021-01-10.sql" \
|
||||||
|
"DELETE:backup__2021-01-09.sql" \
|
||||||
|
"DELETE:backup__2021-01-08.sql" \
|
||||||
|
"DELETE:backup__2021-01-07.sql" \
|
||||||
|
"DELETE:backup__2021-01-06.sql" \
|
||||||
|
"DELETE:backup__2021-01-05.sql" \
|
||||||
|
"DELETE:backup__2021-01-04.sql" \
|
||||||
|
"DELETE:backup__2021-01-03.sql" \
|
||||||
|
"DELETE:backup__2021-01-02.sql" \
|
||||||
|
"DELETE:backup__2021-01-01.sql" \
|
||||||
|
"backup__2020-12-31.sql"
|
||||||
|
}
|
||||||
|
|
||||||
|
function tests {
|
||||||
|
# Run all functions named test_... in this file in definition order
|
||||||
|
count=0
|
||||||
|
while read -r test; do
|
||||||
|
eval "$test"
|
||||||
|
count=$((count + 1))
|
||||||
|
done < <(awk '$1 == "function" && $2 ~ "^test_" {print $2}' "${BASH_SOURCE[0]}")
|
||||||
|
|
||||||
|
echo "------------------"
|
||||||
|
echo "$((count - ERROR_COUNT - FAILURE_COUNT))/$count tests passed"
|
||||||
|
if [[ $((FAILURE_COUNT + ERROR_COUNT)) -gt 0 ]]; then
|
||||||
|
if [[ "$ERROR_COUNT" -gt 0 ]]; then
|
||||||
|
echo "$ERROR_COUNT tests errored"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$FAILURE_COUNT" -gt 0 ]]; then
|
||||||
|
echo "$FAILURE_COUNT tests failed"
|
||||||
|
fi
|
||||||
|
echo 'failure'
|
||||||
|
else
|
||||||
|
echo 'success'
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ "${BASH_SOURCE[0]}" -ef "$0" ]; then
|
||||||
|
trap 'echo -e "\\terror (in ${FUNCNAME[1]} ${BASH_SOURCE[1]}:${BASH_LINENO[1]})\naborting"' EXIT
|
||||||
|
tests
|
||||||
|
trap - EXIT
|
||||||
|
|
||||||
|
if [[ $((FAILURE_COUNT + ERROR_COUNT)) -gt 0 ]]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
185
postgres-docker/weed.sh
Executable file
185
postgres-docker/weed.sh
Executable file
|
@ -0,0 +1,185 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Weed old backups. See HELP for details.
|
||||||
|
# Tests for this script can be found in:
|
||||||
|
# bookwyrm/postgres-docker/tests/testing-entrypoint.sh
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
DAILY_THRESHOLD=14
|
||||||
|
WEEKLY_THRESHOLD=4
|
||||||
|
MONTHLY_THRESHOLD=-1
|
||||||
|
|
||||||
|
HELP="\
|
||||||
|
NAME
|
||||||
|
|
||||||
|
weed -- remove old backups from the backups directory
|
||||||
|
|
||||||
|
SYNOPSIS
|
||||||
|
|
||||||
|
weed.sh [-d threshold] [-w threshold] [-m threshold] [-l] backup_directory
|
||||||
|
|
||||||
|
DESCRIPTION
|
||||||
|
|
||||||
|
Reduce the number of backups by only keeping a certain number of daily backups before \
|
||||||
|
reducing the frequency to weekly, monthly, and then finaly annually.
|
||||||
|
|
||||||
|
For each threshold, setting it to 0 will skip that frequency (e.g., setting weekly to \
|
||||||
|
0 will mean backups go directly from daily to monthly), and setting it to -1 will \
|
||||||
|
never reduce backups to a lower frequency (e.g., setting weekly to -1 will mean \
|
||||||
|
backups never are reduced to monthly backups).
|
||||||
|
|
||||||
|
-d threshold: Store this many daily backups before switching to weekly \
|
||||||
|
(default $DAILY_THRESHOLD)
|
||||||
|
|
||||||
|
-w threshold: Store this many weekly backups before switching to monthly \
|
||||||
|
(default $WEEKLY_THRESHOLD)
|
||||||
|
|
||||||
|
-m threshold: Store this many monthly backups before switching to annual \
|
||||||
|
(default $MONTHLY_THRESHOLD)
|
||||||
|
|
||||||
|
-l: Dry run. List the files that would be deleted.
|
||||||
|
"
|
||||||
|
|
||||||
|
# fail <message>
|
||||||
|
# Write a message to stderr then exit
|
||||||
|
function fail {
|
||||||
|
echo -e "weed: $1" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# parse_threshold <hopefully-a-number>
|
||||||
|
# Thresholds should be a non-negative number (or -1 for no threshold)
|
||||||
|
function parse_threshold {
|
||||||
|
if [[ ! $1 =~ ^-?[0-9]+$ || $1 -lt -1 ]]; then
|
||||||
|
fail "Invalid threshold: $1"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
# weed_directory <directory> <daily_threshold> <weekly_threshold> <monthly_threshold>
|
||||||
|
# List files to be deleted
|
||||||
|
function weed_directory {
|
||||||
|
local directory=$1
|
||||||
|
local daily_threshold=$2
|
||||||
|
local weekly_threshold=$3
|
||||||
|
local monthly_threshold=$4
|
||||||
|
|
||||||
|
local count=0
|
||||||
|
local thresholds=("$daily_threshold" "$weekly_threshold" "$monthly_threshold" -1)
|
||||||
|
local date_formats=("%Y %m %d" "%Y %W" "%Y %m" "%Y")
|
||||||
|
local index=0
|
||||||
|
local last_date=""
|
||||||
|
local last_format=""
|
||||||
|
local date=""
|
||||||
|
|
||||||
|
# We would like to loop through all the backup files in the backup directory in
|
||||||
|
# reverse-chronological order. Bookwyrm backup files are named such that
|
||||||
|
# chronological and lexical order match. So we should be safe to find all backup
|
||||||
|
# files and reverse sort them. We should be terrified of deleting a backup an
|
||||||
|
# instance maintainer wants to keep, so we will be extra cautious. We're ignoring
|
||||||
|
# any subdirectories in case someone moves an important backup into a meaningfully
|
||||||
|
# named folder. We are also prepending the date to the path before sorting so that
|
||||||
|
# the ordering would be correct even if we were allowed to find backup files in
|
||||||
|
# subdirectories where chronological and lexical order don't match.
|
||||||
|
for date_file in $(
|
||||||
|
find "$directory" \
|
||||||
|
-maxdepth 1 \
|
||||||
|
-name 'backup__[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]\.sql' \
|
||||||
|
| sed 's/\(^.*backup__\([0-9-]*\)\.sql$\)/\2\1/' \
|
||||||
|
| sort --reverse
|
||||||
|
); do
|
||||||
|
date="${date_file:0:10}"
|
||||||
|
file="${date_file:10}"
|
||||||
|
date="${date_file:0:10}"
|
||||||
|
file="${date_file:10}"
|
||||||
|
|
||||||
|
# We can't fall off the end because we set annual backups to unlimited. It seems
|
||||||
|
# unlikely that instance maintainers would have enough concern about the space
|
||||||
|
# one backup/year takes to warrant supporting a cutoff.
|
||||||
|
while [[ ${thresholds[index]} -ne -1 && $count -ge ${thresholds[index]} ]]; do
|
||||||
|
index=$((index + 1))
|
||||||
|
last_format=""
|
||||||
|
count=0
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ -z "$last_date" ]]; then
|
||||||
|
count=$((count + 1))
|
||||||
|
last_date=$date
|
||||||
|
last_format=""
|
||||||
|
else
|
||||||
|
if [[ -z "$last_format" ]]; then
|
||||||
|
last_format=$(date --date="$last_date" +"${date_formats[index]}")
|
||||||
|
fi
|
||||||
|
|
||||||
|
format=$(date --date="$date" +"${date_formats[index]}")
|
||||||
|
|
||||||
|
if [[ "$format" == "$last_format" ]]; then
|
||||||
|
echo "$file"
|
||||||
|
else
|
||||||
|
count=$((count + 1))
|
||||||
|
last_date="$date"
|
||||||
|
last_format="$format"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
function main(){
|
||||||
|
local daily_threshold=$DAILY_THRESHOLD
|
||||||
|
local weekly_threshold=$WEEKLY_THRESHOLD
|
||||||
|
local monthly_threshold=$MONTHLY_THRESHOLD
|
||||||
|
local dry_run=""
|
||||||
|
|
||||||
|
while getopts "hd:w:m:l" OPTION; do
|
||||||
|
case "$OPTION" in
|
||||||
|
h)
|
||||||
|
echo "$HELP";
|
||||||
|
exit
|
||||||
|
;;
|
||||||
|
d)
|
||||||
|
daily_threshold=$(parse_threshold "$OPTARG")
|
||||||
|
;;
|
||||||
|
w)
|
||||||
|
weekly_threshold=$(parse_threshold "$OPTARG")
|
||||||
|
;;
|
||||||
|
m)
|
||||||
|
monthly_threshold=$(parse_threshold "$OPTARG")
|
||||||
|
;;
|
||||||
|
l)
|
||||||
|
dry_run="true"
|
||||||
|
;;
|
||||||
|
:)
|
||||||
|
fail "Missing argument for '$OPTARG'. To see help run: weed.sh -h"
|
||||||
|
;;
|
||||||
|
?)
|
||||||
|
fail "Unknown option '$OPTION'. To see help run: weed.sh -h"
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
shift "$((OPTIND - 1))"
|
||||||
|
|
||||||
|
if [[ $# -ne 1 ]]; then
|
||||||
|
fail "expected a single argument, directory"
|
||||||
|
fi
|
||||||
|
|
||||||
|
local count=0
|
||||||
|
for file in $(weed_directory "$1" "$daily_threshold" "$weekly_threshold" "$monthly_threshold"); do
|
||||||
|
count=$((count + 1))
|
||||||
|
if [[ -n "$dry_run" ]]; then
|
||||||
|
echo "$file"
|
||||||
|
else
|
||||||
|
echo "deleting $file" >&2
|
||||||
|
rm "$file"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ -n "$dry_run" ]]; then
|
||||||
|
optional_words="would be "
|
||||||
|
else
|
||||||
|
optional_words=""
|
||||||
|
fi
|
||||||
|
echo -e "$count files ${optional_words}deleted" >&2
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ "${BASH_SOURCE[0]}" -ef "$0" ]; then
|
||||||
|
main "$@"
|
||||||
|
fi
|
Loading…
Reference in a new issue