-
-
Save Dyrcona/846170f956dc19ccf865bc6e30a09f94 to your computer and use it in GitHub Desktop.
gpcq -- Generate a query to check the Evergreen actor.usr_purge_data function
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env perl | |
| use strict; | |
| use warnings; | |
| =head1 NAME | |
| gpcq - Generate a Purge Check Query | |
| =head1 SYNOPSIS | |
| C<gpcq actor.usr_purge_data.sql E<gt> find.sql> | |
| =head1 DESCRIPTION | |
| Run this script on a dump of the function definition of the | |
| actor.usr_purge_data function from your Evergreen installation to | |
| generate a query to find deleted users with unpurged data. You may | |
| then use the output of this query to purge thosse users' data. | |
| You may dump the function definition with the following command line, | |
| adding appropriate connection parameters for your system: | |
| C<psql -c "select pg_get_functiondef('actor.usr_purge_data'::regproc)" \ | |
| -A -t -o actor.usr_purge_data.sql> | |
| After that, you may generate the query with a command like in the synopsis, | |
| or you can pipe the output directly into psql like so: | |
| C<gpcq actor.usr_purge_data.sql | psql --csv -o unpurged_users.csv> | |
| Again, you will need to add any connection parameters as appropriate | |
| to psql. The use of the C<--csv> option and the output filename are | |
| suggestions. You can, of course, get the output in any format you | |
| prefer. | |
| The generated query will dump the id, usrname, and deletion date of | |
| the user from the actor.usr table as well as an array of the table | |
| names where the unpurged data appears for each patron that has been | |
| deleted but has has some unpurged data remaining. | |
| The query is generated from the update and delete statements in the | |
| actor.usr_purge_data function definition. What this function does can | |
| vary by Evergreen release, so this is why you must dump the function | |
| defintion to a file in order for gpcq to parse it. | |
| gpcq takes no options other than the filename of the function definition. | |
| =head1 AUTHOR | |
| Jason Stephenson <[email protected]> | |
| =head1 COPYRIGHT AND LICENSE | |
| Copyright 2025 C/W MARS, Inc. | |
| gpcq is free software: you can redistribute it and/or modify | |
| it under the terms of the GNU General Public License as published by | |
| the Free Software Foundation, either version 2 of the License, or | |
| (at your option) any later version. | |
| gpcq is distributed in the hope that it will be useful, | |
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| GNU General Public License for more details. | |
| You should have received a copy of the GNU General Public License | |
| along with gpcq. If not, see <http://www.gnu.org/licenses/>. | |
| =cut | |
| my %data = (); | |
| if (open my $fh, "<", $ARGV[0]) { | |
| my ($table, $field); | |
| while (my $line = <$fh>) { | |
| if ($line =~ /DELETE\s+FROM\s+([^[:space:]]+)\s+WHERE\s([^[:space:]]+)\s+=\s+src_usr/) { | |
| $table = $1; | |
| $field = $2; | |
| } elsif ($line =~ /UPDATE\s+([^[:space:]]+)/) { | |
| $table = $1; | |
| } | |
| if ($line =~ /SET\s+([^[:space:]]+)\s+=\s+dest_usr/) { | |
| $field = $1; | |
| } | |
| if ($field) { | |
| add_to_data($table, $field); | |
| } | |
| undef($field); | |
| } | |
| close($fh); | |
| } | |
| print("SELECT id, usrname, delete_date, array_agg(table_name) as tables\n"); | |
| print("FROM (\n"); | |
| my $times_used = 0; | |
| foreach my $k (sort keys %data) { | |
| if ($times_used) { | |
| print("UNION\n"); | |
| } | |
| print("SELECT DISTINCT usr${times_used}.id, usr${times_used}.usrname, "); | |
| print("usr${times_used}.create_date as delete_date, '$k' as table_name\n"); | |
| print("FROM actor.usr usr${times_used}\n"); | |
| print("JOIN $k ON "); | |
| my $table = $1 if ($k =~ /^.*\.(.*)$/); | |
| my @fields = @{$data{$k}}; | |
| if (@fields > 1) { | |
| my $f = 0; | |
| print("("); | |
| foreach my $field (@fields) { | |
| if ($f++) { | |
| print(" OR "); | |
| } | |
| print("${table}.${field} = usr${times_used}.id"); | |
| } | |
| print(")\n"); | |
| } else { | |
| print("${table}.${fields[0]} = usr${times_used}.id\n"); | |
| } | |
| print("WHERE usr${times_used}.deleted\n"); | |
| $times_used++; | |
| } | |
| print(")\nGROUP BY id, usrname, delete_date\n"); | |
| print("ORDER BY delete_date, id\n"); | |
| sub add_to_data { | |
| my $table = shift; | |
| my $field = shift; | |
| unless (defined $data{$table}) { | |
| $data{$table} = (); | |
| } | |
| push @{$data{$table}}, $field; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment