Dyrcona · September 20, 2025 14:41
diff --git a/gpcq b/gpcq
 #!/usr/bin/env perl
 use strict;
 use warnings;

 =head1 NAME

 gpcq - Generate a Purge Check Query

 =head1 SYNOPSIS

 C<gpcq actor.usr_purge_data.sql E<gt> find.sql>

 =head1 DESCRIPTION

 Run this script on a dump of the function definition of the
 actor.usr_purge_data function from your Evergreen installation to
 generate a query to find deleted users with unpurged data.  You may
 then use the output of this query to purge thosse users' data.

 You may dump the function definition with the following command line,
 adding appropriate connection parameters for your system:

 C<psql -c "select pg_get_functiondef('actor.usr_purge_data'::regproc)" \
  -A -t -o actor.usr_purge_data.sql>

 After that, you may generate the query with a command like in the synopsis,
 or you can pipe the output directly into psql like so:

 C<gpcq actor.usr_purge_data.sql | psql --csv -o unpurged_users.csv>

 Again, you will need to add any connection parameters as appropriate
 to psql.  The use of the C<--csv> option and the output filename are
 suggestions.  You can, of course, get the output in any format you
 prefer.

 The generated query will dump the id, usrname, and deletion date of
 the user from the actor.usr table as well as an array of the table
 names where the unpurged data appears for each patron that has been
 deleted but has has some unpurged data remaining.

 The query is generated from the update and delete statements in the
 actor.usr_purge_data function definition.  What this function does can
 vary by Evergreen release, so this is why you must dump the function
 defintion to a file in order for gpcq to parse it.

 gpcq takes no options other than the filename of the function definition.

 =head1 AUTHOR

 Jason Stephenson <[email protected]>

 =head1 COPYRIGHT AND LICENSE

 Copyright 2025 C/W MARS, Inc.

 gpcq is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 2 of the License, or
 (at your option) any later version.

 gpcq is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with gpcq.  If not, see <http://www.gnu.org/licenses/>.

 =cut

 my %data = ();

 if (open my $fh, "<", $ARGV[0]) {
    my ($table, $field);
    while (my $line = <$fh>) {
        if ($line =~ /DELETE\s+FROM\s+([^[:space:]]+)\s+WHERE\s([^[:space:]]+)\s+=\s+src_usr/) {
            $table = $1;
            $field = $2;
        } elsif ($line =~ /UPDATE\s+([^[:space:]]+)/) {
            $table = $1;
        }
        if ($line =~ /SET\s+([^[:space:]]+)\s+=\s+dest_usr/) {
            $field = $1;
        }
        if ($field) {
            add_to_data($table, $field);
        }
        undef($field);
    }
    close($fh);
 }

 print("SELECT id, usrname, delete_date, array_agg(table_name) as tables\n");
 print("FROM (\n");
 my $times_used = 0;
 foreach my $k (sort keys %data) {
    if ($times_used) {
        print("UNION\n");
    }
    print("SELECT DISTINCT usr${times_used}.id, usr${times_used}.usrname, ");
    print("usr${times_used}.create_date as delete_date, '$k' as table_name\n");
    print("FROM actor.usr usr${times_used}\n");
    print("JOIN $k ON ");
    my $table = $1 if ($k =~ /^.*\.(.*)$/);
    my @fields = @{$data{$k}};
    if (@fields > 1) {
        my $f = 0;
        print("(");
        foreach my $field (@fields) {
            if ($f++) {
                print(" OR ");
            }
            print("${table}.${field} = usr${times_used}.id");
        }
        print(")\n");
    } else {
        print("${table}.${fields[0]} = usr${times_used}.id\n");
    }
    print("WHERE usr${times_used}.deleted\n");
    $times_used++;
 }
 print(")\nGROUP BY id, usrname, delete_date\n");
 print("ORDER BY delete_date, id\n");

 sub add_to_data {
    my $table = shift;
    my $field = shift;
    unless (defined $data{$table}) {
        $data{$table} = ();
    }
    push @{$data{$table}}, $field;
 }
	#!/usr/bin/env perl
	use strict;
	use warnings;

	=head1 NAME

	gpcq - Generate a Purge Check Query

	=head1 SYNOPSIS

	C<gpcq actor.usr_purge_data.sql E<gt> find.sql>

	=head1 DESCRIPTION

	Run this script on a dump of the function definition of the
	actor.usr_purge_data function from your Evergreen installation to
	generate a query to find deleted users with unpurged data. You may
	then use the output of this query to purge thosse users' data.

	You may dump the function definition with the following command line,
	adding appropriate connection parameters for your system:

	C<psql -c "select pg_get_functiondef('actor.usr_purge_data'::regproc)" \
	-A -t -o actor.usr_purge_data.sql>

	After that, you may generate the query with a command like in the synopsis,
	or you can pipe the output directly into psql like so:

	C<gpcq actor.usr_purge_data.sql \| psql --csv -o unpurged_users.csv>

	Again, you will need to add any connection parameters as appropriate
	to psql. The use of the C<--csv> option and the output filename are
	suggestions. You can, of course, get the output in any format you
	prefer.

	The generated query will dump the id, usrname, and deletion date of
	the user from the actor.usr table as well as an array of the table
	names where the unpurged data appears for each patron that has been
	deleted but has has some unpurged data remaining.

	The query is generated from the update and delete statements in the
	actor.usr_purge_data function definition. What this function does can
	vary by Evergreen release, so this is why you must dump the function
	defintion to a file in order for gpcq to parse it.

	gpcq takes no options other than the filename of the function definition.

	=head1 AUTHOR

	Jason Stephenson <[email protected]>

	=head1 COPYRIGHT AND LICENSE

	Copyright 2025 C/W MARS, Inc.

	gpcq is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 2 of the License, or
	(at your option) any later version.

	gpcq is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.
	You should have received a copy of the GNU General Public License
	along with gpcq. If not, see <http://www.gnu.org/licenses/>.

	=cut

	my %data = ();

	if (open my $fh, "<", $ARGV[0]) {
	my ($table, $field);
	while (my $line = <$fh>) {
	if ($line =~ /DELETE\s+FROM\s+([^[:space:]]+)\s+WHERE\s([^[:space:]]+)\s+=\s+src_usr/) {
	$table = $1;
	$field = $2;
	} elsif ($line =~ /UPDATE\s+([^[:space:]]+)/) {
	$table = $1;
	}
	if ($line =~ /SET\s+([^[:space:]]+)\s+=\s+dest_usr/) {
	$field = $1;
	}
	if ($field) {
	add_to_data($table, $field);
	}
	undef($field);
	}
	close($fh);
	}

	print("SELECT id, usrname, delete_date, array_agg(table_name) as tables\n");
	print("FROM (\n");
	my $times_used = 0;
	foreach my $k (sort keys %data) {
	if ($times_used) {
	print("UNION\n");
	}
	print("SELECT DISTINCT usr${times_used}.id, usr${times_used}.usrname, ");
	print("usr${times_used}.create_date as delete_date, '$k' as table_name\n");
	print("FROM actor.usr usr${times_used}\n");
	print("JOIN $k ON ");
	my $table = $1 if ($k =~ /^.\.(.)$/);
	my @fields = @{$data{$k}};
	if (@fields > 1) {
	my $f = 0;
	print("(");
	foreach my $field (@fields) {
	if ($f++) {
	print(" OR ");
	}
	print("${table}.${field} = usr${times_used}.id");
	}
	print(")\n");
	} else {
	print("${table}.${fields[0]} = usr${times_used}.id\n");
	}
	print("WHERE usr${times_used}.deleted\n");
	$times_used++;
	}
	print(")\nGROUP BY id, usrname, delete_date\n");
	print("ORDER BY delete_date, id\n");

	sub add_to_data {
	my $table = shift;
	my $field = shift;
	unless (defined $data{$table}) {
	$data{$table} = ();
	}
	push @{$data{$table}}, $field;
	}
No results found