process postgres aligned output files
this files are created by using \a to enable aligned output and \o filename
\a \o output.csv select * from my_table;
| *.csv | |
| *.sql |
process postgres aligned output files
this files are created by using \a to enable aligned output and \o filename
\a \o output.csv select * from my_table;
| #!/usr/bin/perl | |
| use strict; | |
| use Data::Dumper; | |
| $Data::Dumper::Indent= 1; | |
| use Util::Matrix; | |
| my $fnm1= shift (@ARGV); | |
| my $fnm2= shift (@ARGV); | |
| my $tbl1= pga::read_table ($fnm1); | |
| $tbl1->mk_idx(0); | |
| my $tbl2= pga::read_table ($fnm2); | |
| $tbl2->mk_idx(0); | |
| my $tbl_x= pga::read_table ('sj_dump.csv'); | |
| my $tbl_y= $tbl_x->filter(2, 1251); | |
| my $idx_y= $tbl_y->mk_idx(0); | |
| my $missing= pga::diff ($tbl1, $tbl2); | |
| my @column_names= @{$tbl1->{'columns'}}; | |
| # print "columns: ", Dumper (\@column_names); | |
| # print "missing: ", Dumper ($missing); | |
| push (@column_names, 'hit'); | |
| my $cn_hit= $#column_names; | |
| my @out_rows; | |
| foreach my $row (@$missing) | |
| { | |
| # print "row: ", Dumper ($row); | |
| $row->[$cn_hit]= ''; | |
| if (exists ($idx_y->{$row->[4]})) | |
| { | |
| $row->[$cn_hit]= 'name'; | |
| push (@out_rows, $row); | |
| } | |
| } | |
| # Util::Matrix::print(\@column_names, $missing); | |
| Util::Matrix::print(\@column_names, \@out_rows); | |
| exit (0); | |
| sub x1 | |
| { | |
| my $fnm1= 'sj_current.csv'; | |
| my $fnm2= 'sj_dump.csv'; | |
| my $tbl1= pga::read_table ($fnm1); | |
| $tbl1->mk_idx(0); | |
| # print "tbl1: ", Dumper ($tbl1); | |
| my $tbl2= pga::read_table ($fnm2); | |
| $tbl2->mk_idx(0); | |
| my $missing= pga::diff ($tbl1, $tbl2); | |
| # print "columns: ", Dumper ($tbl1->{'columns'}); | |
| # print "missing: ", Dumper ($missing); | |
| check_1251 ($missing); | |
| write_inserts ('SchlagwortJelinek', $tbl1->{'columns'}, $missing); | |
| } | |
| sub write_inserts | |
| { | |
| my $tbl_name= shift; | |
| my $col_names= shift; | |
| my $rows= shift; | |
| my $fnm_sql= 'insert_missing.sql'; | |
| open (SQL, '>:utf8', $fnm_sql) or die; | |
| my @col_names= @$col_names; | |
| my $col_count= @col_names; | |
| my $cnt= 0; | |
| foreach my $row (@$rows) | |
| { | |
| my @col; | |
| my @val; | |
| for (my $i= 0; $i < $col_count; $i++) | |
| { | |
| if (defined ($row->[$i]) && $row->[$i] ne '') | |
| { | |
| push (@col, $col_names[$i]); | |
| push (@val, $row->[$i]); | |
| } | |
| } | |
| print SQL "INSERT INTO \"$tbl_name\" (", | |
| join (',', map { $_ } @col), | |
| ") VALUES (", | |
| join (',', map { "'".$_."'" } @val),");\n"; | |
| $cnt++; | |
| } | |
| close (SQL); | |
| print "written $cnt rows to $fnm_sql\n"; | |
| $cnt; | |
| } | |
| =head2 check_1251 | |
| find out if either column 0 or column 2 have the value 1251; | |
| complain otherwise | |
| =cut | |
| sub check_1251 | |
| { | |
| my $rows= shift; | |
| foreach my $row (@$rows) | |
| { | |
| next if ($row->[0] == 1251 || $row->[2] == 1251); | |
| print "check row: ", Dumper($row); | |
| } | |
| } | |
| package pga; | |
| # postgres aligned output | |
| sub mk_idx | |
| { | |
| my $tbl= shift; | |
| my $col= shift; | |
| my $idx; | |
| foreach my $row (@{$tbl->{rows}}) | |
| { | |
| # print "row: ", main::Dumper ($row); | |
| my $v= $row->[$col]; | |
| push (@{$idx->{$v}}, $row); | |
| } | |
| $tbl->{index}= $idx; | |
| } | |
| =head1 filter($table, $column_number, $value) | |
| return a new table consisting only of those rows from $table that contain the given $value in given $column_number. | |
| =cut | |
| sub filter | |
| { | |
| my $tbl= shift; | |
| my $col_num= shift; | |
| my $val= shift; | |
| my @out_rows; | |
| foreach my $row (@{$tbl->{rows}}) | |
| { | |
| push (@out_rows, $row) if ($row->[$col_num] == $val); | |
| } | |
| my $res= { columns => $tbl->{columns}, rows => \@out_rows }; | |
| bless ($res); | |
| $res; | |
| } | |
| =head2 diff ($tbl1, $tbl2) | |
| look for items from $tbl2 which are not present in $tbl1 | |
| =cut | |
| sub diff | |
| { | |
| my $tbl1= shift; | |
| my $tbl2= shift; | |
| my $idx1= $tbl1->{index}; | |
| my $idx2= $tbl2->{index}; | |
| my @missing; | |
| ROW2: foreach my $id (sort { $a <=> $b } keys %$idx2) | |
| { | |
| if (exists ($idx1->{$id})) | |
| { | |
| # TODO: check if both rows are identical or report otherwise; | |
| next ROW2; | |
| } | |
| push (@missing, @{$idx2->{$id}}); | |
| } | |
| (wantarray) ? @missing : \@missing; | |
| } | |
| sub read_table | |
| { | |
| my $fnm= shift; | |
| unless (open (FI, '<:utf8', $fnm)) | |
| { | |
| print "cant read $fnm\n"; | |
| return undef; | |
| } | |
| my @lines= <FI>; | |
| close (FI); | |
| # TODO: check for empty files or other stuff... | |
| chop (@lines); | |
| my $columns= shift (@lines); | |
| my $rows= pop (@lines); | |
| my @columns= split(/\|/, $columns); | |
| # print "columns: ", main::Dumper(\@columns); | |
| print "rows=[$rows]\n"; # TODO: check if $rows is | |
| push (@lines, $rows) unless ($rows =~ m#^\(\d+ rows\)$#); | |
| my @rows; | |
| my $res= | |
| { | |
| 'columns' => \@columns, | |
| 'rows' => \@rows, | |
| }; | |
| bless ($res); | |
| foreach my $line (@lines) | |
| { | |
| my @l= split (/\|/, $line); | |
| push (@rows, \@l); | |
| } | |
| $res; | |
| } |