robkorv · May 19, 2021 10:54 · robkorv · May 19, 2021
diff --git a/clips.py b/clips.py
 #!/usr/bin/env python3

 # https://docs.python.org/3.6/library/os.html
 import os

 # https://docs.python.org/3.6/library/urllib.parse.html#url-parsing
 from urllib.parse import unquote, urlparse

 # https://docs.python-requests.org/en/master/
 import requests

 # https://github.com/voronind/awesome-slugify
 import slugify

 # https://parsel.readthedocs.io/en/latest/index.html
 from parsel import Selector

 input_html = "clips.html"
 output_dir = "/where/ever/you/want/them"


 # open html file
 with open(input_html) as f:
    # read content of file
    html_content = f.read()

 # parse html with the parsel Selector
 # https://parsel.readthedocs.io/en/latest/parsel.html#parsel.selector.Selector
 selector = Selector(html_content)

 # select clip rows with css selectors
 # https://parsel.readthedocs.io/en/latest/usage.html
 # enumerate is used to add a numer to the file name
 # https://docs.python.org/3.6/library/functions.html#enumerate
 for i, clip_row in enumerate(
    selector.css('[data-a-target="clips-manager-table-row-container"]'), 1
 ):

    clip_title = clip_row.css("h5::text").get().strip()
    clip_created = (
        clip_row.css('[data-a-target="clips-manager-row-time-created"]::text')
        .get()
        .strip()
    )
    clip_url = clip_row.css("a[download]").attrib["href"]
    parsed_clip_url = urlparse(clip_url)
    clip_file = unquote(parsed_clip_url.path[1:])

    file_name = slugify.slugify_filename(
        f"{str(i).rjust(4, '0')}-{clip_title}-{clip_created}-{clip_file}"
    )
    # https://docs.python.org/3.6/library/os.path.html#os.path.join
    output_file = os.path.join(output_dir, file_name)

    print(f"downloading {clip_file} to {output_file}")

    # https://docs.python-requests.org/en/master/user/quickstart/#raw-response-content
    with requests.get(clip_url, timeout=30) as response:
        response.raise_for_status()
        with open(output_file, "wb") as f:
            for chunk in response.iter_content(chunk_size=128):
                f.write(chunk)
	#!/usr/bin/env python3

	# https://docs.python.org/3.6/library/os.html
	import os

	# https://docs.python.org/3.6/library/urllib.parse.html#url-parsing
	from urllib.parse import unquote, urlparse

	# https://docs.python-requests.org/en/master/
	import requests

	# https://github.com/voronind/awesome-slugify
	import slugify

	# https://parsel.readthedocs.io/en/latest/index.html
	from parsel import Selector

	input_html = "clips.html"
	output_dir = "/where/ever/you/want/them"


	# open html file
	with open(input_html) as f:
	# read content of file
	html_content = f.read()

	# parse html with the parsel Selector
	# https://parsel.readthedocs.io/en/latest/parsel.html#parsel.selector.Selector
	selector = Selector(html_content)

	# select clip rows with css selectors
	# https://parsel.readthedocs.io/en/latest/usage.html
	# enumerate is used to add a numer to the file name
	# https://docs.python.org/3.6/library/functions.html#enumerate
	for i, clip_row in enumerate(
	selector.css('[data-a-target="clips-manager-table-row-container"]'), 1
	):

	clip_title = clip_row.css("h5::text").get().strip()
	clip_created = (
	clip_row.css('[data-a-target="clips-manager-row-time-created"]::text')
	.get()
	.strip()
	)
	clip_url = clip_row.css("a[download]").attrib["href"]
	parsed_clip_url = urlparse(clip_url)
	clip_file = unquote(parsed_clip_url.path[1:])

	file_name = slugify.slugify_filename(
	f"{str(i).rjust(4, '0')}-{clip_title}-{clip_created}-{clip_file}"
	)
	# https://docs.python.org/3.6/library/os.path.html#os.path.join
	output_file = os.path.join(output_dir, file_name)

	print(f"downloading {clip_file} to {output_file}")

	# https://docs.python-requests.org/en/master/user/quickstart/#raw-response-content
	with requests.get(clip_url, timeout=30) as response:
	response.raise_for_status()
	with open(output_file, "wb") as f:
	for chunk in response.iter_content(chunk_size=128):
	f.write(chunk)
No results found