Skip to content

Instantly share code, notes, and snippets.

@phargogh
Last active January 30, 2026 18:12
Show Gist options
  • Select an option

  • Save phargogh/879173cc7ec013b68c209677262ef732 to your computer and use it in GitHub Desktop.

Select an option

Save phargogh/879173cc7ec013b68c209677262ef732 to your computer and use it in GitHub Desktop.
Small flask app to dynamically tar several files together into a POST response
import datetime
import os
import tarfile
import requests
from flask import Flask
from flask import jsonify
from flask import request
from flask import Response
from flask import stream_with_context
app = Flask(__name__)
@app.route('/bundle-tar/', methods=['POST'])
def stream_tar_file():
req_data = request.get_json()
if req_data is None:
return jsonify({'error': 'JSON required in POST request'})
try:
files_to_stream = req_data['urls']
if not isinstance(files_to_stream, list):
raise TypeError
except (KeyError, TypeError) as e:
return jsonify({
'error': ('"urls" key must be a list of URLs, not '
f'{files_to_stream}, {e}')})
block_size = tarfile.BLOCKSIZE
# TODO: communicate the overall tarfile size as a part of the request.
# TODO: support range requests for resumable downloads
def _generate_tarfile():
for url in files_to_stream:
print(f'processing file {url}')
head_resp = requests.head(url)
head_resp.raise_for_status()
tar_info = tarfile.TarInfo(os.path.basename(url))
tar_info.name = os.path.basename(url)
tar_info.mtime = datetime.datetime.strptime(
head_resp.headers['Last-Modified'],
'%a, %d %b %Y %X %Z').timestamp()
tar_info.size = int(head_resp.headers['Content-Length'])
yield tar_info.tobuf()
with requests.get(url, stream=True) as r:
r.raise_for_status()
for chunk in r.iter_content(chunk_size=block_size):
# chunk is already a bytestring
yield chunk
# If the file isn't a full chunk, then fill the remainder
# of the chunk with tarfile.NUL.
if len(chunk) < block_size:
blocks, remainder = divmod(tar_info.size,
tarfile.BLOCKSIZE)
if remainder > 0:
yield bytes(
tarfile.NUL * (tarfile.BLOCKSIZE - remainder))
break
return Response(stream_with_context(_generate_tarfile()),
mimetype='application/x-tar')
.PHONY: run
env:
python -m venv $@
./env/bin/pip install -r requirements.txt
run:
FLASK_APP=app.py ./env/bin/python -m flask run
import requests
file_a = 'https://storage.googleapis.com/natcap-data-cache/collaborator-data/CNA-neugarten-chaplin-kramer/cna-ncp-optimized_layers.zip.yml'
file_b = 'https://storage.googleapis.com/natcap-data-cache/collaborator-data/CNA-neugarten-chaplin-kramer/cna-ncp-optimized_layers.zip'
# https://stackoverflow.com/a/16696317
def download_file(url, local_filename, files):
#local_filename = url.split('/')[-1]
# NOTE the stream=True parameter below
with requests.post(url, stream=True, json=files) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
#if chunk:
f.write(chunk)
return local_filename
if __name__ == '__main__':
#download_file('http://127.0.0.1:5000/bigtarfile.tar')
download_file('http://127.0.0.1:5000/bundle-tar', 'bigtarfile.tar',
{'urls': [file_a, file_b]})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment