IsoLinearCHiP · March 19, 2015 18:04
diff --git a/ldif_de64.awk b/ldif_de64.awk
 #!/usr/bin/awk -f
 /^\w+:.+/ || /^$/ {
    if(b64) {
        obc=0
        printf attr" "
 
        for(i=1;i<=length(b64);i++) {
 
            c=index( \
                "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", \
                substr(b64,i,1) \
              )
 
            if(c--) {
 
                for(b=0;b<6;b++) {
 
                    o=o*2+int(c/32)
                    c=(c*2)%64
 
                    if(++obc==8) {
 
                        if (o>31 || o==9 || o==13 || o==10) {
                            printf "%c",o
                        } else {
                            printf "."
                        }
 
                        obc=0
                        o=0
                    }
                }
            }
        }
 
        b64=""
        print b46
 
    }
 }
 
 /^\w+:: .+/ { attr=$1; b64=$NF }
 
 /^ .+/ { if (b64){ b64=b64$NF } }
 
 { if(!b64) { print } }
diff --git a/ldif_filter.awk b/ldif_filter.awk
 #!/usr/bin/awk -f

 # FIXME currently expects input to be linejoined, I have a tool ldif_join which does this
 # probably would be good if the script would handle standard LDIF line wrapping with "^ ".

 # from http://readthetxt.tumblr.com/post/29038003066/pure-awk-base64-decoder-for-ldif
 # adapted to work as a function
 # credits to Andrey Domas & Vladimir Kozhukalov
 function de64(b64) {
  obc=0
  for(i=1;i<=length(b64);i++) {
    c=index( \
      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", \
      substr(b64,i,1) \
      )

    if(c--) {
      for(b=0;b<6;b++) {
        o=o*2+int(c/32)
        c=(c*2)%64

        if(++obc==8) {
          if (o>31 || o==9 || o==13 || o==10) {
            result = result sprintf("%c",o)
          } else {
            result = result "."
          }

          obc=0
          o=0
        }
      }
    }
  }
  return result
 }

 # from the GNU awk manual
 function walk_array(arr, name,      i)
 {
    for (i in arr) {
        if (isarray(arr[i]))
            walk_array(arr[i], (name "[" i "]"))
        else
            printf("%s[%s] = %s\n", name, i, arr[i])
    }
 }

 BEGIN { 
  #print de64("SGVsbG8gV29ybGQhCg==")

  ## LDIF separates records by a blank line and fields by newlines
  RS=""
  FS="\n"
  OFS="\n"
  ORS="\n\n"

  # "declare" them as arrays, kinda...?!
  filters["objectClass"] = ""
  counts["objectClass"] = ""
  delete filters

  # collect filterexpressions from the arguments
  for (i = 1; i < ARGC; i++) {
    if (match(ARGV[i], /(.*)(==|!=|<|>|<=|>=|=~|!~)(.*)/, query)) {
      attr = query[1]
      op = query[2]
      val = query[3]
      filters[attr]["op"] = op
      filters[attr]["val"] = val
    }
    # else if (ARGV[i] == "-v")
    #   verbose = 1
    # else if (ARGV[i] == "-q")
    #   debug = 1
    else if (ARGV[i] ~ /^-./) {
      e = sprintf("%s: unrecognized option -- %c",
          ARGV[0], substr(ARGV[i], 2, 1))
      print e > "/dev/stderr"
    # stop at first non option or filter argument. considders the rest to be filenames
    } else
      break
    delete ARGV[i]
  }

  # for (f in filters) printf( "%s%s%s\n", f, filters[f]["op"], filters[f]["val"])
 }

 ## FIXME currently the version header gets processed like a record, might want special handling

 # process a record
 {
  # print "NF=" NF
  delete counts
  delete record
  
  # construct an array representation of the record and debase64 so we can search inside aswell
  for (i = 1; i <= NF; i++) {
    # print "i=" i
    match($i, /([^:]+)(: |:: )(.*)/, fields)
    attr = fields[1]

    if ( fields[2] == ": " ) {
      val = fields[3]
    } else if ( fields[2] == ":: " ) {
      val = de64(fields[3])
    }

    record[attr][counts[attr]++] = val
  }

  # printf "processed %s\n", record["dn"][0]

  printrecord = 1
  for (attr in filters) {
    # printf("attr is '%s'\n", attr)
    if (attr in record) {
      op = filters[attr]["op"]
      val = filters[attr]["val"]
      # printf("%s, %s, %s\n", attr, op, val)
      if (op=="==") {
        found = 0
        # printf("op is ==\n")
        for(i=0;record[attr][i];i++) {
          if (record[attr][i] == val ) { found = 1; break }
        }
        printrecord = printrecord && found
      } else if (op=="!=") {
        found = 0
        for(i=0;record[attr][i];i++) {
          if (record[attr][i] == val ) { found = 1; break }
        }
        printrecord = printrecord && ! found
      } else if (op=="=~") {
        found = 0
        for(i=0;record[attr][i];i++) {
          if (match(record[attr][i], val) ) { found = 1; break }
        }
        printrecord = printrecord && found
      } else if (op=="!~") {
        found = 0
        for(i=0;record[attr][i];i++) {
          if (match(record[attr][i], val) ) { printrecord = 0; break }
        }
        printrecord = printrecord && ! found
      } else {
      }
    } else {
      # FIXME if I ever bother to implement the other comparisons this needs to be changed
      # printf "OPER: %s\n", ( substr(filters[attr]["op"],1,1) == "=" )
      printrecord = printrecord && ! (substr(filters[attr]["op"],1,1) == "=")
    }
  }
  # if (printrecord) walk_array(record, "record")
  if (printrecord) print

 }

 # the old not so flexible bash hack
 ## #!/bin/bash
 ## awk 'BEGIN { RS=""; FS="\n"; OFS="\x00" } { $1=$1; print }' | grep -a "$1" "$2" | awk 'BEGIN { FS="\x00"; OFS="\n"; ORS="\n\n" } { $1=$1; print }'
 ## exit 0
diff --git a/ldif_join.sed b/ldif_join.sed
 #!/bin/sed -nf 
 H; ${ x; s/\n//; s/\n //g; p}
diff --git a/ldif_sortbydn.sh b/ldif_sortbydn.sh
 #!/bin/bash
 awk 'BEGIN { RS=""; FS="\n"; OFS="\x00" } { $1=$1; print }' | sort -t '\0' -k1 | awk 'BEGIN { FS="\x00"; OFS="\n"; ORS="\n\n" } { $1=$1; print }'
	#!/usr/bin/awk -f
	/^\w+:.+/ \|\| /^$/ {
	if(b64) {
	obc=0
	printf attr" "

	for(i=1;i<=length(b64);i++) {

	c=index( \
	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", \
	substr(b64,i,1) \
	)

	if(c--) {

	for(b=0;b<6;b++) {

	o=o*2+int(c/32)
	c=(c*2)%64

	if(++obc==8) {

	if (o>31 \|\| o==9 \|\| o==13 \|\| o==10) {
	printf "%c",o
	} else {
	printf "."
	}

	obc=0
	o=0
	}
	}
	}
	}

	b64=""
	print b46

	}
	}

	/^\w+:: .+/ { attr=$1; b64=$NF }

	/^ .+/ { if (b64){ b64=b64$NF } }

	{ if(!b64) { print } }
	#!/usr/bin/awk -f

	# FIXME currently expects input to be linejoined, I have a tool ldif_join which does this
	# probably would be good if the script would handle standard LDIF line wrapping with "^ ".

	# from http://readthetxt.tumblr.com/post/29038003066/pure-awk-base64-decoder-for-ldif
	# adapted to work as a function
	# credits to Andrey Domas & Vladimir Kozhukalov
	function de64(b64) {
	obc=0
	for(i=1;i<=length(b64);i++) {
	c=index( \
	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", \
	substr(b64,i,1) \
	)

	if(c--) {
	for(b=0;b<6;b++) {
	o=o*2+int(c/32)
	c=(c*2)%64

	if(++obc==8) {
	if (o>31 \|\| o==9 \|\| o==13 \|\| o==10) {
	result = result sprintf("%c",o)
	} else {
	result = result "."
	}

	obc=0
	o=0
	}
	}
	}
	}
	return result
	}

	# from the GNU awk manual
	function walk_array(arr, name, i)
	{
	for (i in arr) {
	if (isarray(arr[i]))
	walk_array(arr[i], (name "[" i "]"))
	else
	printf("%s[%s] = %s\n", name, i, arr[i])
	}
	}

	BEGIN {
	#print de64("SGVsbG8gV29ybGQhCg==")

	## LDIF separates records by a blank line and fields by newlines
	RS=""
	FS="\n"
	OFS="\n"
	ORS="\n\n"

	# "declare" them as arrays, kinda...?!
	filters["objectClass"] = ""
	counts["objectClass"] = ""
	delete filters

	# collect filterexpressions from the arguments
	for (i = 1; i < ARGC; i++) {
	if (match(ARGV[i], /(.)(==\|!=\|<\|>\|<=\|>=\|=~\|!~)(.)/, query)) {
	attr = query[1]
	op = query[2]
	val = query[3]
	filters[attr]["op"] = op
	filters[attr]["val"] = val
	}
	# else if (ARGV[i] == "-v")
	# verbose = 1
	# else if (ARGV[i] == "-q")
	# debug = 1
	else if (ARGV[i] ~ /^-./) {
	e = sprintf("%s: unrecognized option -- %c",
	ARGV[0], substr(ARGV[i], 2, 1))
	print e > "/dev/stderr"
	# stop at first non option or filter argument. considders the rest to be filenames
	} else
	break
	delete ARGV[i]
	}

	# for (f in filters) printf( "%s%s%s\n", f, filters[f]["op"], filters[f]["val"])
	}

	## FIXME currently the version header gets processed like a record, might want special handling

	# process a record
	{
	# print "NF=" NF
	delete counts
	delete record

	# construct an array representation of the record and debase64 so we can search inside aswell
	for (i = 1; i <= NF; i++) {
	# print "i=" i
	match($i, /([^:]+)(: \|:: )(.*)/, fields)
	attr = fields[1]

	if ( fields[2] == ": " ) {
	val = fields[3]
	} else if ( fields[2] == ":: " ) {
	val = de64(fields[3])
	}

	record[attr][counts[attr]++] = val
	}

	# printf "processed %s\n", record["dn"][0]

	printrecord = 1
	for (attr in filters) {
	# printf("attr is '%s'\n", attr)
	if (attr in record) {
	op = filters[attr]["op"]
	val = filters[attr]["val"]
	# printf("%s, %s, %s\n", attr, op, val)
	if (op=="==") {
	found = 0
	# printf("op is ==\n")
	for(i=0;record[attr][i];i++) {
	if (record[attr][i] == val ) { found = 1; break }
	}
	printrecord = printrecord && found
	} else if (op=="!=") {
	found = 0
	for(i=0;record[attr][i];i++) {
	if (record[attr][i] == val ) { found = 1; break }
	}
	printrecord = printrecord && ! found
	} else if (op=="=~") {
	found = 0
	for(i=0;record[attr][i];i++) {
	if (match(record[attr][i], val) ) { found = 1; break }
	}
	printrecord = printrecord && found
	} else if (op=="!~") {
	found = 0
	for(i=0;record[attr][i];i++) {
	if (match(record[attr][i], val) ) { printrecord = 0; break }
	}
	printrecord = printrecord && ! found
	} else {
	}
	} else {
	# FIXME if I ever bother to implement the other comparisons this needs to be changed
	# printf "OPER: %s\n", ( substr(filters[attr]["op"],1,1) == "=" )
	printrecord = printrecord && ! (substr(filters[attr]["op"],1,1) == "=")
	}
	}
	# if (printrecord) walk_array(record, "record")
	if (printrecord) print

	}

	# the old not so flexible bash hack
	## #!/bin/bash
	## awk 'BEGIN { RS=""; FS="\n"; OFS="\x00" } { $1=$1; print }' \| grep -a "$1" "$2" \| awk 'BEGIN { FS="\x00"; OFS="\n"; ORS="\n\n" } { $1=$1; print }'
	## exit 0
	#!/bin/bash
	awk 'BEGIN { RS=""; FS="\n"; OFS="\x00" } { $1=$1; print }' \| sort -t '\0' -k1 \| awk 'BEGIN { FS="\x00"; OFS="\n"; ORS="\n\n" } { $1=$1; print }'