Skip to content

Instantly share code, notes, and snippets.

@robkorv
Created May 19, 2021 10:54
Show Gist options
  • Select an option

  • Save robkorv/9de240b22a238ae57d3dfd55ebbf2f94 to your computer and use it in GitHub Desktop.

Select an option

Save robkorv/9de240b22a238ae57d3dfd55ebbf2f94 to your computer and use it in GitHub Desktop.
download clips from your twitch channel
#!/usr/bin/env python3
# https://docs.python.org/3.6/library/os.html
import os
# https://docs.python.org/3.6/library/urllib.parse.html#url-parsing
from urllib.parse import unquote, urlparse
# https://docs.python-requests.org/en/master/
import requests
# https://github.com/voronind/awesome-slugify
import slugify
# https://parsel.readthedocs.io/en/latest/index.html
from parsel import Selector
input_html = "clips.html"
output_dir = "/where/ever/you/want/them"
# open html file
with open(input_html) as f:
# read content of file
html_content = f.read()
# parse html with the parsel Selector
# https://parsel.readthedocs.io/en/latest/parsel.html#parsel.selector.Selector
selector = Selector(html_content)
# select clip rows with css selectors
# https://parsel.readthedocs.io/en/latest/usage.html
# enumerate is used to add a numer to the file name
# https://docs.python.org/3.6/library/functions.html#enumerate
for i, clip_row in enumerate(
selector.css('[data-a-target="clips-manager-table-row-container"]'), 1
):
clip_title = clip_row.css("h5::text").get().strip()
clip_created = (
clip_row.css('[data-a-target="clips-manager-row-time-created"]::text')
.get()
.strip()
)
clip_url = clip_row.css("a[download]").attrib["href"]
parsed_clip_url = urlparse(clip_url)
clip_file = unquote(parsed_clip_url.path[1:])
file_name = slugify.slugify_filename(
f"{str(i).rjust(4, '0')}-{clip_title}-{clip_created}-{clip_file}"
)
# https://docs.python.org/3.6/library/os.path.html#os.path.join
output_file = os.path.join(output_dir, file_name)
print(f"downloading {clip_file} to {output_file}")
# https://docs.python-requests.org/en/master/user/quickstart/#raw-response-content
with requests.get(clip_url, timeout=30) as response:
response.raise_for_status()
with open(output_file, "wb") as f:
for chunk in response.iter_content(chunk_size=128):
f.write(chunk)
@robkorv
Copy link
Author

robkorv commented May 19, 2021

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment