Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save pierstitus/c310dd8a4cb45370e7a0607d4f5ec7db to your computer and use it in GitHub Desktop.

Select an option

Save pierstitus/c310dd8a4cb45370e7a0607d4f5ec7db to your computer and use it in GitHub Desktop.
Benchmark filesize and read/ write times for various GeoTiff compression algorithms
__author__ = 'kersten.clauss'
"""Benchmark different GeoTiff compression algorithms.
Usage: GTiff_compression_benchmark.py some_geo.tif
Requires the GDAL tools to be present and executable from the command line of your system.
This script will take a GeoTiff as input and create copies with different compression algorithms from it.
It measures the filesize, compression and decompression times and returns them as a table.
Author:
Kersten Clauss
[email protected]
"""
import os
import sys
import time
from hurry.filesize import size, si
from osgeo import gdal
import pandas
if __name__ == "__main__":
if len(sys.argv) < 2: # check if directory is given
raise Exception("Missing input GeoTiff.")
src_img = os.path.abspath(sys.argv[1])
# create tmp dir
tmp_dir = os.path.join(os.path.dirname(src_img), "tmp")
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
# generate filenames
input = os.path.join(tmp_dir, "input.tif")
tests = [
{'name':'uncompressed', 'title':"Uncompressed", 'command':'gdal_translate -of GTiff '},
{'name':'packbits', 'title':"Packbits", 'command':'gdal_translate -of GTiff -co "COMPRESS=PACKBITS" -co "TILED=YES" '},
{'name':'deflate_1', 'title':"Deflate pred=1", 'command':'gdal_translate -of GTiff -co "COMPRESS=DEFLATE" -co "PREDICTOR=1" -co "TILED=YES" '},
{'name':'deflate_2', 'title':"Deflate pred=2", 'command':'gdal_translate -of GTiff -co "COMPRESS=DEFLATE" -co "PREDICTOR=2" -co "TILED=YES" '},
{'name':'deflate_3', 'title':"Deflate pred=3", 'command':'gdal_translate -of GTiff -co "COMPRESS=DEFLATE" -co "PREDICTOR=3" -co "TILED=YES" '},
{'name':'lzw_1', 'title':"LZW pred=1", 'command':'gdal_translate -of GTiff -co "COMPRESS=LZW" -co "PREDICTOR=1" -co "TILED=YES" '},
{'name':'lzw_2', 'title':"LZW pred=2", 'command':'gdal_translate -of GTiff -co "COMPRESS=LZW" -co "PREDICTOR=2" -co "TILED=YES" '},
{'name':'lzw_3', 'title':"LZW pred=3", 'command':'gdal_translate -of GTiff -co "COMPRESS=LZW" -co "PREDICTOR=3" -co "TILED=YES" '},
]
if not 'Float' in gdal.GetDataTypeName(gdal.Open(src_img).GetRasterBand(1).DataType):
print("## Input is not floating point, so skip PREDICTOR=3 ##")
for test in tests:
if 'PREDICTOR=3' in test['command']:
tests.remove(test)
# make sure to have uncompressed input
command = "gdal_translate -of GTiff " + src_img + " " + input
print("## Make sure input is uncompressed ##")
os.system(command)
for test in tests:
test['file'] = os.path.join(tmp_dir, test['name'] + '.tif')
# execute and time compression
print("## Execute and time compressions ##")
for test in tests:
start_time = time.time()
os.system(test['command'] + ' ' + input + ' ' + test['file'])
test['write'] = time.time() - start_time
# check filesizes
for test in tests:
test['size'] = size(os.path.getsize(test['file']), system=si)
# check read times
print("## Execute and time decompressions ##")
for test in tests:
start_time = time.time()
img = gdal.Open(test['file']).ReadAsArray()
img = None
test['read'] = time.time() - start_time
# print results
names = [test['title'] for test in tests]
sizes = [test['size'] for test in tests]
writes = [test['write'] for test in tests]
reads = [test['read'] for test in tests]
print("## Benchmark results ##")
print(pandas.DataFrame([sizes, writes, reads], ["Size", "Write time", "Read time"], names))
# remove tmp directory
os.remove(input)
for test in tests:
os.remove(test['file'])
os.removedirs(tmp_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment