Skip to content

Instantly share code, notes, and snippets.

@IsoLinearCHiP
Created March 19, 2015 18:04
Show Gist options
  • Select an option

  • Save IsoLinearCHiP/3d97e7d6623a8d840231 to your computer and use it in GitHub Desktop.

Select an option

Save IsoLinearCHiP/3d97e7d6623a8d840231 to your computer and use it in GitHub Desktop.
LDIF filter in awk and some companion scripts
#!/usr/bin/awk -f
/^\w+:.+/ || /^$/ {
if(b64) {
obc=0
printf attr" "
for(i=1;i<=length(b64);i++) {
c=index( \
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", \
substr(b64,i,1) \
)
if(c--) {
for(b=0;b<6;b++) {
o=o*2+int(c/32)
c=(c*2)%64
if(++obc==8) {
if (o>31 || o==9 || o==13 || o==10) {
printf "%c",o
} else {
printf "."
}
obc=0
o=0
}
}
}
}
b64=""
print b46
}
}
/^\w+:: .+/ { attr=$1; b64=$NF }
/^ .+/ { if (b64){ b64=b64$NF } }
{ if(!b64) { print } }
#!/usr/bin/awk -f
# FIXME currently expects input to be linejoined, I have a tool ldif_join which does this
# probably would be good if the script would handle standard LDIF line wrapping with "^ ".
# from http://readthetxt.tumblr.com/post/29038003066/pure-awk-base64-decoder-for-ldif
# adapted to work as a function
# credits to Andrey Domas & Vladimir Kozhukalov
function de64(b64) {
obc=0
for(i=1;i<=length(b64);i++) {
c=index( \
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", \
substr(b64,i,1) \
)
if(c--) {
for(b=0;b<6;b++) {
o=o*2+int(c/32)
c=(c*2)%64
if(++obc==8) {
if (o>31 || o==9 || o==13 || o==10) {
result = result sprintf("%c",o)
} else {
result = result "."
}
obc=0
o=0
}
}
}
}
return result
}
# from the GNU awk manual
function walk_array(arr, name, i)
{
for (i in arr) {
if (isarray(arr[i]))
walk_array(arr[i], (name "[" i "]"))
else
printf("%s[%s] = %s\n", name, i, arr[i])
}
}
BEGIN {
#print de64("SGVsbG8gV29ybGQhCg==")
## LDIF separates records by a blank line and fields by newlines
RS=""
FS="\n"
OFS="\n"
ORS="\n\n"
# "declare" them as arrays, kinda...?!
filters["objectClass"] = ""
counts["objectClass"] = ""
delete filters
# collect filterexpressions from the arguments
for (i = 1; i < ARGC; i++) {
if (match(ARGV[i], /(.*)(==|!=|<|>|<=|>=|=~|!~)(.*)/, query)) {
attr = query[1]
op = query[2]
val = query[3]
filters[attr]["op"] = op
filters[attr]["val"] = val
}
# else if (ARGV[i] == "-v")
# verbose = 1
# else if (ARGV[i] == "-q")
# debug = 1
else if (ARGV[i] ~ /^-./) {
e = sprintf("%s: unrecognized option -- %c",
ARGV[0], substr(ARGV[i], 2, 1))
print e > "/dev/stderr"
# stop at first non option or filter argument. considders the rest to be filenames
} else
break
delete ARGV[i]
}
# for (f in filters) printf( "%s%s%s\n", f, filters[f]["op"], filters[f]["val"])
}
## FIXME currently the version header gets processed like a record, might want special handling
# process a record
{
# print "NF=" NF
delete counts
delete record
# construct an array representation of the record and debase64 so we can search inside aswell
for (i = 1; i <= NF; i++) {
# print "i=" i
match($i, /([^:]+)(: |:: )(.*)/, fields)
attr = fields[1]
if ( fields[2] == ": " ) {
val = fields[3]
} else if ( fields[2] == ":: " ) {
val = de64(fields[3])
}
record[attr][counts[attr]++] = val
}
# printf "processed %s\n", record["dn"][0]
printrecord = 1
for (attr in filters) {
# printf("attr is '%s'\n", attr)
if (attr in record) {
op = filters[attr]["op"]
val = filters[attr]["val"]
# printf("%s, %s, %s\n", attr, op, val)
if (op=="==") {
found = 0
# printf("op is ==\n")
for(i=0;record[attr][i];i++) {
if (record[attr][i] == val ) { found = 1; break }
}
printrecord = printrecord && found
} else if (op=="!=") {
found = 0
for(i=0;record[attr][i];i++) {
if (record[attr][i] == val ) { found = 1; break }
}
printrecord = printrecord && ! found
} else if (op=="=~") {
found = 0
for(i=0;record[attr][i];i++) {
if (match(record[attr][i], val) ) { found = 1; break }
}
printrecord = printrecord && found
} else if (op=="!~") {
found = 0
for(i=0;record[attr][i];i++) {
if (match(record[attr][i], val) ) { printrecord = 0; break }
}
printrecord = printrecord && ! found
} else {
}
} else {
# FIXME if I ever bother to implement the other comparisons this needs to be changed
# printf "OPER: %s\n", ( substr(filters[attr]["op"],1,1) == "=" )
printrecord = printrecord && ! (substr(filters[attr]["op"],1,1) == "=")
}
}
# if (printrecord) walk_array(record, "record")
if (printrecord) print
}
# the old not so flexible bash hack
## #!/bin/bash
## awk 'BEGIN { RS=""; FS="\n"; OFS="\x00" } { $1=$1; print }' | grep -a "$1" "$2" | awk 'BEGIN { FS="\x00"; OFS="\n"; ORS="\n\n" } { $1=$1; print }'
## exit 0
#!/bin/sed -nf
H; ${ x; s/\n//; s/\n //g; p}
#!/bin/bash
awk 'BEGIN { RS=""; FS="\n"; OFS="\x00" } { $1=$1; print }' | sort -t '\0' -k1 | awk 'BEGIN { FS="\x00"; OFS="\n"; ORS="\n\n" } { $1=$1; print }'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment