Last active
January 30, 2026 18:12
-
-
Save phargogh/879173cc7ec013b68c209677262ef732 to your computer and use it in GitHub Desktop.
Small flask app to dynamically tar several files together into a POST response
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import datetime | |
| import os | |
| import tarfile | |
| import requests | |
| from flask import Flask | |
| from flask import jsonify | |
| from flask import request | |
| from flask import Response | |
| from flask import stream_with_context | |
| app = Flask(__name__) | |
| @app.route('/bundle-tar/', methods=['POST']) | |
| def stream_tar_file(): | |
| req_data = request.get_json() | |
| if req_data is None: | |
| return jsonify({'error': 'JSON required in POST request'}) | |
| try: | |
| files_to_stream = req_data['urls'] | |
| if not isinstance(files_to_stream, list): | |
| raise TypeError | |
| except (KeyError, TypeError) as e: | |
| return jsonify({ | |
| 'error': ('"urls" key must be a list of URLs, not ' | |
| f'{files_to_stream}, {e}')}) | |
| block_size = tarfile.BLOCKSIZE | |
| # TODO: communicate the overall tarfile size as a part of the request. | |
| # TODO: support range requests for resumable downloads | |
| def _generate_tarfile(): | |
| for url in files_to_stream: | |
| print(f'processing file {url}') | |
| head_resp = requests.head(url) | |
| head_resp.raise_for_status() | |
| tar_info = tarfile.TarInfo(os.path.basename(url)) | |
| tar_info.name = os.path.basename(url) | |
| tar_info.mtime = datetime.datetime.strptime( | |
| head_resp.headers['Last-Modified'], | |
| '%a, %d %b %Y %X %Z').timestamp() | |
| tar_info.size = int(head_resp.headers['Content-Length']) | |
| yield tar_info.tobuf() | |
| with requests.get(url, stream=True) as r: | |
| r.raise_for_status() | |
| for chunk in r.iter_content(chunk_size=block_size): | |
| # chunk is already a bytestring | |
| yield chunk | |
| # If the file isn't a full chunk, then fill the remainder | |
| # of the chunk with tarfile.NUL. | |
| if len(chunk) < block_size: | |
| blocks, remainder = divmod(tar_info.size, | |
| tarfile.BLOCKSIZE) | |
| if remainder > 0: | |
| yield bytes( | |
| tarfile.NUL * (tarfile.BLOCKSIZE - remainder)) | |
| break | |
| return Response(stream_with_context(_generate_tarfile()), | |
| mimetype='application/x-tar') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| .PHONY: run | |
| env: | |
| python -m venv $@ | |
| ./env/bin/pip install -r requirements.txt | |
| run: | |
| FLASK_APP=app.py ./env/bin/python -m flask run |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| flask | |
| requests |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| file_a = 'https://storage.googleapis.com/natcap-data-cache/collaborator-data/CNA-neugarten-chaplin-kramer/cna-ncp-optimized_layers.zip.yml' | |
| file_b = 'https://storage.googleapis.com/natcap-data-cache/collaborator-data/CNA-neugarten-chaplin-kramer/cna-ncp-optimized_layers.zip' | |
| # https://stackoverflow.com/a/16696317 | |
| def download_file(url, local_filename, files): | |
| #local_filename = url.split('/')[-1] | |
| # NOTE the stream=True parameter below | |
| with requests.post(url, stream=True, json=files) as r: | |
| r.raise_for_status() | |
| with open(local_filename, 'wb') as f: | |
| for chunk in r.iter_content(chunk_size=8192): | |
| # If you have chunk encoded response uncomment if | |
| # and set chunk_size parameter to None. | |
| #if chunk: | |
| f.write(chunk) | |
| return local_filename | |
| if __name__ == '__main__': | |
| #download_file('http://127.0.0.1:5000/bigtarfile.tar') | |
| download_file('http://127.0.0.1:5000/bundle-tar', 'bigtarfile.tar', | |
| {'urls': [file_a, file_b]}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment