Last active
December 16, 2024 19:42
-
-
Save aamir814/092ed85bd90e28d79af029e561c1da88 to your computer and use it in GitHub Desktop.
Test sync_replication_slot to keep the logical replication running during a failover
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| ##################################################################### | |
| # Purpose: | |
| # The following steps show how to use the new PostgreSQL parameter | |
| # sync_replication_slot introduced in version 17 to keep logical | |
| # replication running during a failover or major upgrade. | |
| # | |
| # The setup includes three RDS PostgreSQL instances: Primary (PRI), | |
| # Read Replica (RR), and Logical Replica (LR). After successfully | |
| # creating the logical replication, the idea is to promote the RR | |
| # (to simulate failover) and update the LR (subscriber) to point to | |
| # the new primary instance without re-syncing the data. | |
| # | |
| # Assumption: | |
| # * VPC Security group must exists and should allow your IP to connect to RDS Instances. | |
| # * AWS CLI must be configured. | |
| # * Must have connectivity to the RDS instance. | |
| # | |
| # REFERENCES | |
| # - https://docs.aws.amazon.com/cli/latest/reference/rds/create-db-instance-read-replica.html | |
| # - https://repost.aws/knowledge-center/rds-postgresql-use-logical-replication | |
| # - https://docs.aws.amazon.com/cli/latest/reference/rds/#cli-aws-rds | |
| # - https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Appendix.PostgreSQL.CommonDBATasks.pglogical.slot.synchronization.html | |
| # - https://www.postgresql.org/docs/current/sql-altersubscription.html | |
| # - https://docs.aws.amazon.com/cli/latest/reference/rds/create-blue-green-deployment.html | |
| ##################################################################### | |
| # change as needed | |
| export DB_INSTANCE_NAME=pgtest-17 | |
| export DB_INSTANCE_NAME_LR=pgtest-17-lr | |
| export DB_INSTANCE_NAME_RR=pgtest-17-rr | |
| export VPC_SG_IDS=sg-06797e8215f149043 | |
| export DB_ENGINE=postgres | |
| export DB_VERSION=17.1 | |
| export DB_PASSWORD=ChangeM3 | |
| export PGPASSWORD=$DB_PASSWORD | |
| # create parameter group for primary and read replica | |
| aws rds create-db-parameter-group \ | |
| --db-parameter-group-name pg-$DB_INSTANCE_NAME \ | |
| --db-parameter-group-family postgres17 \ | |
| --description "logical replication test with sync_replication_slots" | |
| aws rds create-db-parameter-group \ | |
| --db-parameter-group-name pg-$DB_INSTANCE_NAME_RR \ | |
| --db-parameter-group-family postgres17 \ | |
| --description "logical replication test with sync_replication_slots" | |
| # create parameter group for logical replica | |
| aws rds create-db-parameter-group \ | |
| --db-parameter-group-name pg-$DB_INSTANCE_NAME_LR \ | |
| --db-parameter-group-family postgres17 \ | |
| --description "logical replication test with sync_replication_slots" | |
| # modify the parameter group to set wal_level to logical. | |
| # add the following DB parameters as needed. For this test default is good. | |
| #max_wal_senders | |
| #max_replication_slots | |
| #max_connections | |
| aws rds modify-db-parameter-group \ | |
| --db-parameter-group-name pg-$DB_INSTANCE_NAME \ | |
| --parameters "ParameterName='rds.logical_replication',ParameterValue=1,ApplyMethod=pending-reboot" \ | |
| "ParameterName='sync_replication_slots',ParameterValue=1,ApplyMethod=pending-reboot" | |
| # Update rds.logical_slot_sync_dbname to desire database | |
| aws rds modify-db-parameter-group \ | |
| --db-parameter-group-name pg-$DB_INSTANCE_NAME_RR \ | |
| --parameters "ParameterName='rds.logical_replication',ParameterValue=1,ApplyMethod=pending-reboot" \ | |
| "ParameterName='hot_standby_feedback',ParameterValue='1',ApplyMethod=pending-reboot" \ | |
| "ParameterName='rds.logical_slot_sync_dbname',ParameterValue='postgres',ApplyMethod=pending-reboot" \ | |
| "ParameterName='sync_replication_slots',ParameterValue=1,ApplyMethod=pending-reboot" | |
| # create instances with public access (not recommended in Production) | |
| aws rds create-db-instance \ | |
| --db-instance-identifier $DB_INSTANCE_NAME \ | |
| --allocated-storage 20 \ | |
| --storage-encrypted \ | |
| --db-instance-class db.t3.medium \ | |
| --engine $DB_ENGINE \ | |
| --master-username postgres \ | |
| --master-user-password $DB_PASSWORD \ | |
| --vpc-security-group-ids $VPC_SG_IDS \ | |
| --engine-version $DB_VERSION \ | |
| --no-multi-az | |
| aws rds create-db-instance \ | |
| --db-instance-identifier $DB_INSTANCE_NAME_LR \ | |
| --allocated-storage 20 \ | |
| --storage-encrypted \ | |
| --db-instance-class db.t3.medium \ | |
| --engine $DB_ENGINE \ | |
| --master-username postgres \ | |
| --master-user-password $DB_PASSWORD \ | |
| --vpc-security-group-ids $VPC_SG_IDS \ | |
| --engine-version $DB_VERSION \ | |
| --no-multi-az | |
| #create Read Replica | |
| aws rds create-db-instance-read-replica \ | |
| --db-instance-identifier $DB_INSTANCE_NAME_RR \ | |
| --source-db-instance-identifier $DB_INSTANCE_NAME \ | |
| --vpc-security-group-ids $VPC_SG_IDS | |
| # verfiy instances role | |
| aws rds describe-db-instances \ | |
| --query "*[].{DBInstanceIdentifier:DBInstanceIdentifier,ReadReplica:ReadReplicaDBInstanceIdentifiers[0],ReadReplicaSourceDB:ReadReplicaSourceDBInstanceIdentifier}" \ | |
| --output table | |
| :' | |
| ------------------------------------------------------------------ | |
| | DescribeDBInstances | | |
| +----------------------+-----------------+-----------------------+ | |
| | DBInstanceIdentifier | ReadReplica | ReadReplicaSourceDB | | |
| +----------------------+-----------------+-----------------------+ | |
| | upgtest-17 | upgtest-17-rr | None | | |
| | upgtest-17-lr | None | None | | |
| | upgtest-17-rr | None | upgtest-17 | | |
| +----------------------+-----------------+-----------------------+ | |
| ' | |
| # get endpoint name to connect to instance | |
| DB_PRI=$(aws rds describe-db-instances \ | |
| --db-instance-identifier $DB_INSTANCE_NAME \ | |
| --query "*[].Endpoint.Address" \ | |
| --output text) | |
| DB_RR=$(aws rds describe-db-instances \ | |
| --db-instance-identifier $DB_INSTANCE_NAME_RR \ | |
| --query "*[].Endpoint.Address" \ | |
| --output text) | |
| DB_LR=$(aws rds describe-db-instances \ | |
| --db-instance-identifier $DB_INSTANCE_NAME_LR \ | |
| --query "*[].Endpoint.Address" \ | |
| --output text) | |
| # echo $DB_PRI $DB_RR $DB_LR | |
| # get the physical slot name from the primary for the synchronized_standby_slots parameter | |
| PHYSICAL_SLOT=$(psql -h $DB_PRI -U postgres -t -c "SELECT slot_name FROM pg_replication_slots where slot_type='physical'") | |
| # echo $PHYSICAL_SLOT | |
| # Modify primary parameter group with Physical standby slot name | |
| aws rds modify-db-parameter-group \ | |
| --db-parameter-group-name pg-$DB_INSTANCE_NAME \ | |
| --parameters "ParameterName='synchronized_standby_slots',ParameterValue=$PHYSICAL_SLOT,ApplyMethod=pending-reboot" | |
| # assign parameter groups to db instances | |
| aws rds modify-db-instance \ | |
| --db-instance-identifier $DB_INSTANCE_NAME \ | |
| --db-parameter-group-name pg-$DB_INSTANCE_NAME \ | |
| --apply-immediately | |
| aws rds modify-db-instance \ | |
| --db-instance-identifier $DB_INSTANCE_NAME_RR \ | |
| --db-parameter-group-name pg-$DB_INSTANCE_NAME_RR \ | |
| --apply-immediately | |
| # restart instances | |
| aws rds reboot-db-instance \ | |
| --db-instance-identifier $DB_INSTANCE_NAME | |
| aws rds reboot-db-instance \ | |
| --db-instance-identifier $DB_INSTANCE_NAME_RR | |
| # assign parameter group to the logical replication instance | |
| aws rds modify-db-instance \ | |
| --db-instance-identifier $DB_INSTANCE_NAME_LR \ | |
| --db-parameter-group-name pg-$DB_INSTANCE_NAME_LR \ | |
| --apply-immediately | |
| # restart LR instance | |
| aws rds reboot-db-instance \ | |
| --db-instance-identifier $DB_INSTANCE_NAME_LR | |
| # verify parameters | |
| # These parameters must be set in order for LR to work | |
| # after failover. | |
| psql -h $DB_PRI -U postgres -c "SELECT name,setting FROM pg_settings WHERE name IN ('wal_level','rds.logical_replication','sync_replication_slots','synchronized_standby_slots')" | |
| :' output | |
| name | setting | |
| ----------------------------+--------------------------------------------- | |
| rds.logical_replication | on | |
| sync_replication_slots | on | |
| synchronized_standby_slots | rds_us_east_1_db_dn2uyr2436rexq3u7gcdfzw4hy | |
| wal_level | logical | |
| (4 rows) | |
| ' | |
| # rds.logical_slot_sync_dbname is not available in pg_settings | |
| # make sure to update it if your database is not "postgres." | |
| psql -h $DB_RR -U postgres -c "SELECT name,setting FROM pg_settings WHERE name IN ('wal_level','rds.logical_replication','sync_replication_slots','hot_standby_feedback','rds.logical_slot_sync_dbname')" | |
| :' output | |
| name | setting | |
| -------------------------+--------- | |
| hot_standby_feedback | on | |
| rds.logical_replication | on | |
| sync_replication_slots | on | |
| wal_level | logical | |
| (4 rows) | |
| ' | |
| # Connect to the source database and create the tables and publication: | |
| psql -h $DB_PRI -U postgres -c "CREATE TABLE reptab1 (slno int primary key); | |
| CREATE TABLE reptab2 (name varchar(20)); | |
| --Insert data into the source tables: | |
| INSERT INTO reptab1 VALUES (generate_series(1,1000)); | |
| INSERT INTO reptab2 SELECT SUBSTR ('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',((random()*(36-1)+1)::integer),1) FROM generate_series(1,50); | |
| --Create a publication for the source tables. Use a SELECT query to verify the details of the publication that was created: | |
| CREATE PUBLICATION testpub FOR TABLE reptab1,reptab2; | |
| SELECT * FROM pg_publication; | |
| --Verify that the source tables are added to the publication: | |
| SELECT * FROM pg_publication_tables; | |
| " | |
| :'output | |
| pubname | schemaname | tablename | attnames | rowfilter | |
| ---------+------------+-----------+----------+----------- | |
| testpub | public | reptab1 | {slno} | | |
| testpub | public | reptab2 | {name} | | |
| (2 rows) | |
| ' | |
| # Connect to the LR and create the target tables with the | |
| # same names as the source tables. | |
| # Be sure that there's no data present in the target tables by | |
| # running a SELECT query on the target tables: | |
| psql -h $DB_LR -U postgres -c "CREATE TABLE reptab1 (slno int primary key); | |
| CREATE TABLE reptab2 (name varchar(20));" | |
| psql -h $DB_LR -U postgres -c "SELECT count(*) FROM reptab1;" | |
| psql -h $DB_LR -U postgres -c "SELECT count(*) FROM reptab2;" | |
| # Create and verify subscriptions in the target database | |
| # Important: To avoid storing a plaintext version of your user name and password in the database logs, before creating the subscription, run the following commands: | |
| # SET log_min_messages to 'PANIC'; | |
| # SET log_statement to NONE; | |
| # run the create subscription command on ec2 instance inside the same VPC | |
| # so hostname resolves to private IP or | |
| # modify the security group to allow connections from public IP addresses. | |
| aws ec2 authorize-security-group-ingress \ | |
| --group-id $VPC_SG_IDS \ | |
| --protocol tcp \ | |
| --port 5432 \ | |
| --cidr $(nslookup $DB_PRI | grep Address | tail -1 | awk '{print $2"/32"}') | |
| aws ec2 authorize-security-group-ingress \ | |
| --group-id $VPC_SG_IDS \ | |
| --protocol tcp \ | |
| --port 5432 \ | |
| --cidr $(nslookup $DB_RR | grep Address | tail -1 | awk '{print $2"/32"}') | |
| aws ec2 authorize-security-group-ingress \ | |
| --group-id $VPC_SG_IDS \ | |
| --protocol tcp \ | |
| --port 5432 \ | |
| --cidr $(nslookup $DB_LR | grep Address | tail -1 | awk '{print $2"/32"}') | |
| # can only execute "create subscription" in the command line | |
| psql -h $DB_LR -U postgres -c "CREATE SUBSCRIPTION testsub CONNECTION 'host=$DB_PRI port=5432 dbname=postgres user=postgres password=$DB_PASSWORD' | |
| PUBLICATION testpub WITH (failover = true);" | |
| :'output | |
| NOTICE: created replication slot "testsub" on publisher | |
| CREATE SUBSCRIPTION | |
| ' | |
| # verify subscription | |
| psql -h $DB_LR -U postgres -c "SELECT oid,subname,subenabled,subslotname,subpublications FROM pg_subscription;" | |
| :'output | |
| oid | subname | subenabled | subslotname | subpublications | |
| -------+---------+------------+-------------+----------------- | |
| 24611 | testsub | t | testsub | {testpub} | |
| (1 row) | |
| ' | |
| # verify that data has been copied to LR | |
| psql -h $DB_LR -U postgres -c "SELECT count(*) FROM reptab1;" | |
| :'output | |
| count | |
| ------- | |
| 1000 | |
| (1 row) | |
| ' | |
| psql -h $DB_LR -U postgres -c "SELECT count(*) FROM reptab2;" | |
| :'output | |
| count | |
| ------- | |
| 50 | |
| (1 row) | |
| ' | |
| # Verify the replication slot in the PRI database | |
| psql -h $DB_PRI -U postgres -c "SELECT slot_name, pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_lsn(),restart_lsn)) AS replicationSlotLag, | |
| active, failover, synced FROM pg_replication_slots" | |
| :'output | |
| slot_name | replicationslotlag | active | failover | synced | |
| ---------------------------------------------+--------------------+--------+----------+-------- | |
| testsub | 56 bytes | t | t | f | |
| rds_us_east_1_db_dn2uyr2436rexq3u7gcdfzw4hy | 0 bytes | t | f | f | |
| (2 rows) | |
| ' | |
| # The logical slot also exist in the RR instance | |
| psql -h $DB_RR -U postgres -c "select slot_name, slot_type, active, failover, synced from pg_replication_slots;" | |
| :'output | |
| slot_name | slot_type | active | failover | synced | |
| -----------+-----------+--------+----------+-------- | |
| testsub | logical | f | t | t | |
| (1 row) | |
| ' | |
| # Test the CDC by inserting data in the source tables | |
| psql -h $DB_PRI -U postgres -c "INSERT INTO reptab1 VALUES(generate_series(1001,2000)); | |
| INSERT INTO reptab2 SELECT SUBSTR('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',((random()*(36-1)+1)::integer),1) FROM generate_series(1,50);" | |
| psql -h $DB_PRI -U postgres -c "SELECT count(*) FROM reptab1;" | |
| :'output | |
| count | |
| ------- | |
| 2000 | |
| (1 row) | |
| ' | |
| psql -h $DB_PRI -U postgres -c "SELECT count(*) FROM reptab2;" | |
| :'output | |
| count | |
| ------- | |
| 100 | |
| (1 row) | |
| ' | |
| # verifying number of rows in the LR matches | |
| psql -h $DB_LR -U postgres -c "SELECT count(*) FROM reptab1;" | |
| :'output | |
| count | |
| ------- | |
| 2000 | |
| (1 row) | |
| ' | |
| psql -h $DB_LR -U postgres -c "SELECT count(*) FROM reptab2;" | |
| :'output | |
| count | |
| ------- | |
| 100 | |
| (1 row) | |
| ' | |
| ###################################################### | |
| ####################### TEST ######################## | |
| ###################################################### | |
| # Now we have everything set up. Let's promote the RR and | |
| # point LR to RR | |
| # Promote RR and see if logical replication still works. | |
| aws rds promote-read-replica \ | |
| --db-instance-identifier $DB_INSTANCE_NAME_RR | |
| # Alter the subscription in the LR instance to point to RR (which is now writable) | |
| psql -h $DB_LR -U postgres -c "ALTER SUBSCRIPTION testsub CONNECTION 'host=$DB_RR port=5432 dbname=postgres user=postgres password=$DB_PASSWORD';" | |
| # test by inserting rows in the promoted RR | |
| psql -h $DB_RR -U postgres -c "INSERT INTO reptab1 VALUES(generate_series(2001,3000)); | |
| INSERT INTO reptab2 SELECT SUBSTR('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',((random()*(36-1)+1)::integer),1) FROM generate_series(1,50);" | |
| # verify the row count in LR | |
| psql -h $DB_LR -U postgres -c "SELECT count(*) FROM reptab1;" | |
| :'output | |
| count | |
| ------- | |
| 3000 | |
| (1 row) | |
| ' | |
| psql -h $DB_LR -U postgres -c "SELECT count(*) FROM reptab2;" | |
| :'output | |
| count | |
| ------- | |
| 150 | |
| (1 row) | |
| ' | |
| # The replication slots in DB_PRI is not active | |
| psql -h $DB_PRI -U postgres -c "SELECT slot_name, pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_lsn(),restart_lsn)) AS replicationSlotLag, | |
| active FROM pg_replication_slots" | |
| :'output | |
| slot_name | replicationslotlag | active | |
| -----------+--------------------+-------- | |
| testsub | 192 MB | f | |
| (1 row) | |
| ' | |
| # The replication slots in DB_RR is active | |
| psql -h $DB_RR -U postgres -c "SELECT slot_name, pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_lsn(),restart_lsn)) AS replicationSlotLag, | |
| active FROM pg_replication_slots" | |
| :'output | |
| slot_name | replicationslotlag | active | |
| -----------+--------------------+-------- | |
| testsub | 56 bytes | t | |
| (1 row) | |
| ' | |
| ###################################################### | |
| ##################### CLEAN-UP ####################### | |
| ###################################################### | |
| # remove security group inbound rules | |
| aws ec2 revoke-security-group-ingress \ | |
| --group-id $VPC_SG_IDS \ | |
| --protocol tcp \ | |
| --port 5432 \ | |
| --cidr $(nslookup $DB_PRI | grep Address | tail -1 | awk '{print $2"/32"}') | |
| aws ec2 revoke-security-group-ingress \ | |
| --group-id $VPC_SG_IDS \ | |
| --protocol tcp \ | |
| --port 5432 \ | |
| --cidr $(nslookup $DB_RR | grep Address | tail -1 | awk '{print $2"/32"}') | |
| aws ec2 revoke-security-group-ingress \ | |
| --group-id $VPC_SG_IDS \ | |
| --protocol tcp \ | |
| --port 5432 \ | |
| --cidr $(nslookup $DB_LR | grep Address | tail -1 | awk '{print $2"/32"}') | |
| # Delete instances | |
| aws rds delete-db-instance \ | |
| --db-instance-identifier $DB_INSTANCE_NAME \ | |
| --skip-final-snapshot | |
| aws rds delete-db-instance \ | |
| --db-instance-identifier $DB_INSTANCE_NAME_RR \ | |
| --skip-final-snapshot | |
| aws rds delete-db-instance \ | |
| --db-instance-identifier $DB_INSTANCE_NAME_LR \ | |
| --skip-final-snapshot | |
| # Delete parameter groups | |
| aws rds delete-db-parameter-group \ | |
| --db-parameter-group-name pg-$DB_INSTANCE_NAME | |
| aws rds delete-db-parameter-group \ | |
| --db-parameter-group-name pg-$DB_INSTANCE_NAME_RR | |
| aws rds delete-db-parameter-group \ | |
| --db-parameter-group-name pg-$DB_INSTANCE_NAME_LR | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment