Last active
November 25, 2022 22:17
-
-
Save Telewa/a614a8d43882ade998c71501b44fa4c5 to your computer and use it in GitHub Desktop.
Download many files using wget and python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import asyncio | |
| from itertools import islice | |
| from typing import List, Callable | |
| def chunk(arr_range, arr_size): | |
| arr_range = iter(arr_range) | |
| return iter(lambda: tuple(islice(arr_range, arr_size)), ()) | |
| async def main(batch_size: int, items: List[any], executor: Callable[[any], any]): | |
| """ | |
| :param batch_size: How many items to be consumed per batch from the list of items | |
| :param items: List of items to be acted upon | |
| :param executor: An async callable that should act on each item in the list | |
| :return: None | |
| """ | |
| batch_size = min(batch_size, len(items)) | |
| groups = list(chunk(items, batch_size)) | |
| for index, group in enumerate(groups): | |
| print(f"chunk number: {index + 1}/{len(groups)}, {len(group)} items") | |
| tasks = [executor(item) for item in group] | |
| await asyncio.gather(*tasks) | |
| async def execute(path: str): | |
| command = f'wget -c --tries=0 --read-timeout=20 {path} -P images' | |
| process = await asyncio.create_subprocess_shell(command) | |
| await process.wait() | |
| number_of_items = 10 | |
| urls = [ | |
| f"https://convertermaniacs.com/images/speedometer-mph/speedometer-showing-needle-at-{n}-mph.jpg" | |
| for n in range(1, number_of_items + 1) | |
| ] | |
| asyncio.run(main(batch_size=5, items=urls, executor=execute)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment