phargogh · January 30, 2026 18:12
diff --git a/app.py b/app.py
 import datetime
 import os
 import tarfile

 import requests
 from flask import Flask
 from flask import jsonify
 from flask import request
 from flask import Response
 from flask import stream_with_context

 app = Flask(__name__)

 @app.route('/bundle-tar/', methods=['POST'])
 def stream_tar_file():
    req_data = request.get_json()
    if req_data is None:
        return jsonify({'error': 'JSON required in POST request'})

    try:
        files_to_stream = req_data['urls']
        if not isinstance(files_to_stream, list):
            raise TypeError
    except (KeyError, TypeError) as e:
        return jsonify({
            'error': ('"urls" key must be a list of URLs, not '
                      f'{files_to_stream}, {e}')})

    block_size = tarfile.BLOCKSIZE

    # TODO: communicate the overall tarfile size as a part of the request.
    # TODO: support range requests for resumable downloads

    def _generate_tarfile():
        for url in files_to_stream:
            print(f'processing file {url}')
            head_resp = requests.head(url)
            head_resp.raise_for_status()
            tar_info = tarfile.TarInfo(os.path.basename(url))
            tar_info.name = os.path.basename(url)
            tar_info.mtime = datetime.datetime.strptime(
                head_resp.headers['Last-Modified'],
                '%a, %d %b %Y %X %Z').timestamp()
            tar_info.size = int(head_resp.headers['Content-Length'])

            yield tar_info.tobuf()

            with requests.get(url, stream=True) as r:
                r.raise_for_status()
                for chunk in r.iter_content(chunk_size=block_size):
                    # chunk is already a bytestring
                    yield chunk

                    # If the file isn't a full chunk, then fill the remainder
                    # of the chunk with tarfile.NUL.
                    if len(chunk) < block_size:
                        blocks, remainder = divmod(tar_info.size,
                                                   tarfile.BLOCKSIZE)
                        if remainder > 0:
                            yield bytes(
                                tarfile.NUL * (tarfile.BLOCKSIZE - remainder))
                        break

    return Response(stream_with_context(_generate_tarfile()),
                    mimetype='application/x-tar')
diff --git a/Makefile b/Makefile
 .PHONY: run

 env:
 	python -m venv $@
 	./env/bin/pip install -r requirements.txt

 run:
 	FLASK_APP=app.py ./env/bin/python -m flask run
diff --git a/requirements.txt b/requirements.txt
 flask
 requests
diff --git a/test.py b/test.py
 import requests

 file_a = 'https://storage.googleapis.com/natcap-data-cache/collaborator-data/CNA-neugarten-chaplin-kramer/cna-ncp-optimized_layers.zip.yml'
 file_b = 'https://storage.googleapis.com/natcap-data-cache/collaborator-data/CNA-neugarten-chaplin-kramer/cna-ncp-optimized_layers.zip'

 # https://stackoverflow.com/a/16696317
 def download_file(url, local_filename, files):
    #local_filename = url.split('/')[-1]
    # NOTE the stream=True parameter below
    with requests.post(url, stream=True, json=files) as r:
        r.raise_for_status()
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                # If you have chunk encoded response uncomment if
                # and set chunk_size parameter to None.
                #if chunk:
                f.write(chunk)
    return local_filename

 if __name__ == '__main__':
    #download_file('http://127.0.0.1:5000/bigtarfile.tar')
    download_file('http://127.0.0.1:5000/bundle-tar', 'bigtarfile.tar',
                  {'urls': [file_a, file_b]})
	import datetime
	import os
	import tarfile

	import requests
	from flask import Flask
	from flask import jsonify
	from flask import request
	from flask import Response
	from flask import stream_with_context

	app = Flask(__name__)

	@app.route('/bundle-tar/', methods=['POST'])
	def stream_tar_file():
	req_data = request.get_json()
	if req_data is None:
	return jsonify({'error': 'JSON required in POST request'})

	try:
	files_to_stream = req_data['urls']
	if not isinstance(files_to_stream, list):
	raise TypeError
	except (KeyError, TypeError) as e:
	return jsonify({
	'error': ('"urls" key must be a list of URLs, not '
	f'{files_to_stream}, {e}')})

	block_size = tarfile.BLOCKSIZE

	# TODO: communicate the overall tarfile size as a part of the request.
	# TODO: support range requests for resumable downloads

	def _generate_tarfile():
	for url in files_to_stream:
	print(f'processing file {url}')
	head_resp = requests.head(url)
	head_resp.raise_for_status()
	tar_info = tarfile.TarInfo(os.path.basename(url))
	tar_info.name = os.path.basename(url)
	tar_info.mtime = datetime.datetime.strptime(
	head_resp.headers['Last-Modified'],
	'%a, %d %b %Y %X %Z').timestamp()
	tar_info.size = int(head_resp.headers['Content-Length'])

	yield tar_info.tobuf()

	with requests.get(url, stream=True) as r:
	r.raise_for_status()
	for chunk in r.iter_content(chunk_size=block_size):
	# chunk is already a bytestring
	yield chunk

	# If the file isn't a full chunk, then fill the remainder
	# of the chunk with tarfile.NUL.
	if len(chunk) < block_size:
	blocks, remainder = divmod(tar_info.size,
	tarfile.BLOCKSIZE)
	if remainder > 0:
	yield bytes(
	tarfile.NUL * (tarfile.BLOCKSIZE - remainder))
	break

	return Response(stream_with_context(_generate_tarfile()),
	mimetype='application/x-tar')
	.PHONY: run

	env:
	python -m venv $@
	./env/bin/pip install -r requirements.txt

	run:
	FLASK_APP=app.py ./env/bin/python -m flask run
	import requests

	file_a = 'https://storage.googleapis.com/natcap-data-cache/collaborator-data/CNA-neugarten-chaplin-kramer/cna-ncp-optimized_layers.zip.yml'
	file_b = 'https://storage.googleapis.com/natcap-data-cache/collaborator-data/CNA-neugarten-chaplin-kramer/cna-ncp-optimized_layers.zip'

	# https://stackoverflow.com/a/16696317
	def download_file(url, local_filename, files):
	#local_filename = url.split('/')[-1]
	# NOTE the stream=True parameter below
	with requests.post(url, stream=True, json=files) as r:
	r.raise_for_status()
	with open(local_filename, 'wb') as f:
	for chunk in r.iter_content(chunk_size=8192):
	# If you have chunk encoded response uncomment if
	# and set chunk_size parameter to None.
	#if chunk:
	f.write(chunk)
	return local_filename

	if __name__ == '__main__':
	#download_file('http://127.0.0.1:5000/bigtarfile.tar')
	download_file('http://127.0.0.1:5000/bundle-tar', 'bigtarfile.tar',
	{'urls': [file_a, file_b]})