Created
March 19, 2015 18:04
-
-
Save IsoLinearCHiP/3d97e7d6623a8d840231 to your computer and use it in GitHub Desktop.
LDIF filter in awk and some companion scripts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/awk -f | |
| /^\w+:.+/ || /^$/ { | |
| if(b64) { | |
| obc=0 | |
| printf attr" " | |
| for(i=1;i<=length(b64);i++) { | |
| c=index( \ | |
| "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", \ | |
| substr(b64,i,1) \ | |
| ) | |
| if(c--) { | |
| for(b=0;b<6;b++) { | |
| o=o*2+int(c/32) | |
| c=(c*2)%64 | |
| if(++obc==8) { | |
| if (o>31 || o==9 || o==13 || o==10) { | |
| printf "%c",o | |
| } else { | |
| printf "." | |
| } | |
| obc=0 | |
| o=0 | |
| } | |
| } | |
| } | |
| } | |
| b64="" | |
| print b46 | |
| } | |
| } | |
| /^\w+:: .+/ { attr=$1; b64=$NF } | |
| /^ .+/ { if (b64){ b64=b64$NF } } | |
| { if(!b64) { print } } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/awk -f | |
| # FIXME currently expects input to be linejoined, I have a tool ldif_join which does this | |
| # probably would be good if the script would handle standard LDIF line wrapping with "^ ". | |
| # from http://readthetxt.tumblr.com/post/29038003066/pure-awk-base64-decoder-for-ldif | |
| # adapted to work as a function | |
| # credits to Andrey Domas & Vladimir Kozhukalov | |
| function de64(b64) { | |
| obc=0 | |
| for(i=1;i<=length(b64);i++) { | |
| c=index( \ | |
| "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", \ | |
| substr(b64,i,1) \ | |
| ) | |
| if(c--) { | |
| for(b=0;b<6;b++) { | |
| o=o*2+int(c/32) | |
| c=(c*2)%64 | |
| if(++obc==8) { | |
| if (o>31 || o==9 || o==13 || o==10) { | |
| result = result sprintf("%c",o) | |
| } else { | |
| result = result "." | |
| } | |
| obc=0 | |
| o=0 | |
| } | |
| } | |
| } | |
| } | |
| return result | |
| } | |
| # from the GNU awk manual | |
| function walk_array(arr, name, i) | |
| { | |
| for (i in arr) { | |
| if (isarray(arr[i])) | |
| walk_array(arr[i], (name "[" i "]")) | |
| else | |
| printf("%s[%s] = %s\n", name, i, arr[i]) | |
| } | |
| } | |
| BEGIN { | |
| #print de64("SGVsbG8gV29ybGQhCg==") | |
| ## LDIF separates records by a blank line and fields by newlines | |
| RS="" | |
| FS="\n" | |
| OFS="\n" | |
| ORS="\n\n" | |
| # "declare" them as arrays, kinda...?! | |
| filters["objectClass"] = "" | |
| counts["objectClass"] = "" | |
| delete filters | |
| # collect filterexpressions from the arguments | |
| for (i = 1; i < ARGC; i++) { | |
| if (match(ARGV[i], /(.*)(==|!=|<|>|<=|>=|=~|!~)(.*)/, query)) { | |
| attr = query[1] | |
| op = query[2] | |
| val = query[3] | |
| filters[attr]["op"] = op | |
| filters[attr]["val"] = val | |
| } | |
| # else if (ARGV[i] == "-v") | |
| # verbose = 1 | |
| # else if (ARGV[i] == "-q") | |
| # debug = 1 | |
| else if (ARGV[i] ~ /^-./) { | |
| e = sprintf("%s: unrecognized option -- %c", | |
| ARGV[0], substr(ARGV[i], 2, 1)) | |
| print e > "/dev/stderr" | |
| # stop at first non option or filter argument. considders the rest to be filenames | |
| } else | |
| break | |
| delete ARGV[i] | |
| } | |
| # for (f in filters) printf( "%s%s%s\n", f, filters[f]["op"], filters[f]["val"]) | |
| } | |
| ## FIXME currently the version header gets processed like a record, might want special handling | |
| # process a record | |
| { | |
| # print "NF=" NF | |
| delete counts | |
| delete record | |
| # construct an array representation of the record and debase64 so we can search inside aswell | |
| for (i = 1; i <= NF; i++) { | |
| # print "i=" i | |
| match($i, /([^:]+)(: |:: )(.*)/, fields) | |
| attr = fields[1] | |
| if ( fields[2] == ": " ) { | |
| val = fields[3] | |
| } else if ( fields[2] == ":: " ) { | |
| val = de64(fields[3]) | |
| } | |
| record[attr][counts[attr]++] = val | |
| } | |
| # printf "processed %s\n", record["dn"][0] | |
| printrecord = 1 | |
| for (attr in filters) { | |
| # printf("attr is '%s'\n", attr) | |
| if (attr in record) { | |
| op = filters[attr]["op"] | |
| val = filters[attr]["val"] | |
| # printf("%s, %s, %s\n", attr, op, val) | |
| if (op=="==") { | |
| found = 0 | |
| # printf("op is ==\n") | |
| for(i=0;record[attr][i];i++) { | |
| if (record[attr][i] == val ) { found = 1; break } | |
| } | |
| printrecord = printrecord && found | |
| } else if (op=="!=") { | |
| found = 0 | |
| for(i=0;record[attr][i];i++) { | |
| if (record[attr][i] == val ) { found = 1; break } | |
| } | |
| printrecord = printrecord && ! found | |
| } else if (op=="=~") { | |
| found = 0 | |
| for(i=0;record[attr][i];i++) { | |
| if (match(record[attr][i], val) ) { found = 1; break } | |
| } | |
| printrecord = printrecord && found | |
| } else if (op=="!~") { | |
| found = 0 | |
| for(i=0;record[attr][i];i++) { | |
| if (match(record[attr][i], val) ) { printrecord = 0; break } | |
| } | |
| printrecord = printrecord && ! found | |
| } else { | |
| } | |
| } else { | |
| # FIXME if I ever bother to implement the other comparisons this needs to be changed | |
| # printf "OPER: %s\n", ( substr(filters[attr]["op"],1,1) == "=" ) | |
| printrecord = printrecord && ! (substr(filters[attr]["op"],1,1) == "=") | |
| } | |
| } | |
| # if (printrecord) walk_array(record, "record") | |
| if (printrecord) print | |
| } | |
| # the old not so flexible bash hack | |
| ## #!/bin/bash | |
| ## awk 'BEGIN { RS=""; FS="\n"; OFS="\x00" } { $1=$1; print }' | grep -a "$1" "$2" | awk 'BEGIN { FS="\x00"; OFS="\n"; ORS="\n\n" } { $1=$1; print }' | |
| ## exit 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/sed -nf | |
| H; ${ x; s/\n//; s/\n //g; p} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| awk 'BEGIN { RS=""; FS="\n"; OFS="\x00" } { $1=$1; print }' | sort -t '\0' -k1 | awk 'BEGIN { FS="\x00"; OFS="\n"; ORS="\n\n" } { $1=$1; print }' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment