-
-
Save pierstitus/c310dd8a4cb45370e7a0607d4f5ec7db to your computer and use it in GitHub Desktop.
Benchmark filesize and read/ write times for various GeoTiff compression algorithms
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| __author__ = 'kersten.clauss' | |
| """Benchmark different GeoTiff compression algorithms. | |
| Usage: GTiff_compression_benchmark.py some_geo.tif | |
| Requires the GDAL tools to be present and executable from the command line of your system. | |
| This script will take a GeoTiff as input and create copies with different compression algorithms from it. | |
| It measures the filesize, compression and decompression times and returns them as a table. | |
| Author: | |
| Kersten Clauss | |
| [email protected] | |
| """ | |
| import os | |
| import sys | |
| import time | |
| from hurry.filesize import size, si | |
| from osgeo import gdal | |
| import pandas | |
| if __name__ == "__main__": | |
| if len(sys.argv) < 2: # check if directory is given | |
| raise Exception("Missing input GeoTiff.") | |
| src_img = os.path.abspath(sys.argv[1]) | |
| # create tmp dir | |
| tmp_dir = os.path.join(os.path.dirname(src_img), "tmp") | |
| if not os.path.exists(tmp_dir): | |
| os.makedirs(tmp_dir) | |
| # generate filenames | |
| input = os.path.join(tmp_dir, "input.tif") | |
| tests = [ | |
| {'name':'uncompressed', 'title':"Uncompressed", 'command':'gdal_translate -of GTiff '}, | |
| {'name':'packbits', 'title':"Packbits", 'command':'gdal_translate -of GTiff -co "COMPRESS=PACKBITS" -co "TILED=YES" '}, | |
| {'name':'deflate_1', 'title':"Deflate pred=1", 'command':'gdal_translate -of GTiff -co "COMPRESS=DEFLATE" -co "PREDICTOR=1" -co "TILED=YES" '}, | |
| {'name':'deflate_2', 'title':"Deflate pred=2", 'command':'gdal_translate -of GTiff -co "COMPRESS=DEFLATE" -co "PREDICTOR=2" -co "TILED=YES" '}, | |
| {'name':'deflate_3', 'title':"Deflate pred=3", 'command':'gdal_translate -of GTiff -co "COMPRESS=DEFLATE" -co "PREDICTOR=3" -co "TILED=YES" '}, | |
| {'name':'lzw_1', 'title':"LZW pred=1", 'command':'gdal_translate -of GTiff -co "COMPRESS=LZW" -co "PREDICTOR=1" -co "TILED=YES" '}, | |
| {'name':'lzw_2', 'title':"LZW pred=2", 'command':'gdal_translate -of GTiff -co "COMPRESS=LZW" -co "PREDICTOR=2" -co "TILED=YES" '}, | |
| {'name':'lzw_3', 'title':"LZW pred=3", 'command':'gdal_translate -of GTiff -co "COMPRESS=LZW" -co "PREDICTOR=3" -co "TILED=YES" '}, | |
| ] | |
| if not 'Float' in gdal.GetDataTypeName(gdal.Open(src_img).GetRasterBand(1).DataType): | |
| print("## Input is not floating point, so skip PREDICTOR=3 ##") | |
| for test in tests: | |
| if 'PREDICTOR=3' in test['command']: | |
| tests.remove(test) | |
| # make sure to have uncompressed input | |
| command = "gdal_translate -of GTiff " + src_img + " " + input | |
| print("## Make sure input is uncompressed ##") | |
| os.system(command) | |
| for test in tests: | |
| test['file'] = os.path.join(tmp_dir, test['name'] + '.tif') | |
| # execute and time compression | |
| print("## Execute and time compressions ##") | |
| for test in tests: | |
| start_time = time.time() | |
| os.system(test['command'] + ' ' + input + ' ' + test['file']) | |
| test['write'] = time.time() - start_time | |
| # check filesizes | |
| for test in tests: | |
| test['size'] = size(os.path.getsize(test['file']), system=si) | |
| # check read times | |
| print("## Execute and time decompressions ##") | |
| for test in tests: | |
| start_time = time.time() | |
| img = gdal.Open(test['file']).ReadAsArray() | |
| img = None | |
| test['read'] = time.time() - start_time | |
| # print results | |
| names = [test['title'] for test in tests] | |
| sizes = [test['size'] for test in tests] | |
| writes = [test['write'] for test in tests] | |
| reads = [test['read'] for test in tests] | |
| print("## Benchmark results ##") | |
| print(pandas.DataFrame([sizes, writes, reads], ["Size", "Write time", "Read time"], names)) | |
| # remove tmp directory | |
| os.remove(input) | |
| for test in tests: | |
| os.remove(test['file']) | |
| os.removedirs(tmp_dir) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment