Skip to content

Instantly share code, notes, and snippets.

@Telewa
Last active November 25, 2022 22:17
Show Gist options
  • Select an option

  • Save Telewa/a614a8d43882ade998c71501b44fa4c5 to your computer and use it in GitHub Desktop.

Select an option

Save Telewa/a614a8d43882ade998c71501b44fa4c5 to your computer and use it in GitHub Desktop.
Download many files using wget and python
import asyncio
from itertools import islice
from typing import List, Callable
def chunk(arr_range, arr_size):
arr_range = iter(arr_range)
return iter(lambda: tuple(islice(arr_range, arr_size)), ())
async def main(batch_size: int, items: List[any], executor: Callable[[any], any]):
"""
:param batch_size: How many items to be consumed per batch from the list of items
:param items: List of items to be acted upon
:param executor: An async callable that should act on each item in the list
:return: None
"""
batch_size = min(batch_size, len(items))
groups = list(chunk(items, batch_size))
for index, group in enumerate(groups):
print(f"chunk number: {index + 1}/{len(groups)}, {len(group)} items")
tasks = [executor(item) for item in group]
await asyncio.gather(*tasks)
async def execute(path: str):
command = f'wget -c --tries=0 --read-timeout=20 {path} -P images'
process = await asyncio.create_subprocess_shell(command)
await process.wait()
number_of_items = 10
urls = [
f"https://convertermaniacs.com/images/speedometer-mph/speedometer-showing-needle-at-{n}-mph.jpg"
for n in range(1, number_of_items + 1)
]
asyncio.run(main(batch_size=5, items=urls, executor=execute))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment