bkamapantula · March 27, 2021 11:58
diff --git a/README.md b/README.md
diff --git a/youtube-livechat.py b/youtube-livechat.py
 """Retrieve live chat content from YouTube videos."""

 import json
 import csv
 import pytchat
 import ray

 # initialize ray
 ray.init()

 urls = []

 # AllScraped.csv has several rows. Each row has two columns.
 # first column: YouTube video ID, second column is the video title.
 with open('AllScraped.csv', mode='r') as file_in:
    reader = csv.reader(file_in)
    for _id, row in enumerate(reader):
        if _id != 0:
            urls.append(row[0])

 # after the above for loop completes execution, urls list will have list of video IDs
 # this list will be used at the end of this script to retrieve the live chat for each url.

 @ray.remote
 def fetch_live_chat(video_id):
    """Fetch live chat messages for a YouTube video ID.
    Args:
        video_id (str): video ID. ex: https://www.youtube.com/watch?v=ID
    
    Returns:
        None
    
    Writes output to `video_id.json` file.
    Usage: fetch_live_chat('XQhBHnPIsRk')
    """
    print("current ID:", video_id)

    chat = pytchat.create(video_id=video_id)
    chats = []
    keys = ['author', 'message']

    while chat.is_alive():
        print("chat is alive...")
        for c in chat.get().sync_items():
            print(f"{c.datetime} [{c.author.name}]- {c.message}")
            obj = json.dumps({'author': c.author.name, 'message': c.message})
            chats.append(json.loads(obj))

    with open(f"{video_id}.json", "w") as fout:
        json.dump(chats, fout)
    
    with open(f"{video_id}.csv", "w", newline='') as file_out:
        dict_writer = csv.DictWriter(file_out, keys)
        dict_writer.writeheader()
        dict_writer.writerows(chats)


 def read_from_json(video_id):
    """Get comments from a JSON file."""
    file_in = json.load(open(f"{video_id}.json"))

 # scrape all videos
 # this invocation (function.remote(variable)) follows ray's convention
 [fetch_live_chat.remote(url) for url in urls]
 # after executing the above list comprehension, two files are created. 1) VIDEO_ID.json, 2) VIDEO_ID.csv

 # if you've a set of videos that are already scraped, save their IDs in ignore_urls list
 # and run the below
 [fetch_live_chat.remote(url) for url in urls if url not in ignore_urls]
 # after executing the above list comprehension, two files are created. 1) VIDEO_ID.json, 2) VIDEO_ID.csv
	"""Retrieve live chat content from YouTube videos."""

	import json
	import csv
	import pytchat
	import ray

	# initialize ray
	ray.init()

	urls = []

	# AllScraped.csv has several rows. Each row has two columns.
	# first column: YouTube video ID, second column is the video title.
	with open('AllScraped.csv', mode='r') as file_in:
	reader = csv.reader(file_in)
	for _id, row in enumerate(reader):
	if _id != 0:
	urls.append(row[0])

	# after the above for loop completes execution, urls list will have list of video IDs
	# this list will be used at the end of this script to retrieve the live chat for each url.

	@ray.remote
	def fetch_live_chat(video_id):
	"""Fetch live chat messages for a YouTube video ID.
	Args:
	video_id (str): video ID. ex: https://www.youtube.com/watch?v=ID

	Returns:
	None

	Writes output to `video_id.json` file.
	Usage: fetch_live_chat('XQhBHnPIsRk')
	"""
	print("current ID:", video_id)

	chat = pytchat.create(video_id=video_id)
	chats = []
	keys = ['author', 'message']

	while chat.is_alive():
	print("chat is alive...")
	for c in chat.get().sync_items():
	print(f"{c.datetime} [{c.author.name}]- {c.message}")
	obj = json.dumps({'author': c.author.name, 'message': c.message})
	chats.append(json.loads(obj))

	with open(f"{video_id}.json", "w") as fout:
	json.dump(chats, fout)

	with open(f"{video_id}.csv", "w", newline='') as file_out:
	dict_writer = csv.DictWriter(file_out, keys)
	dict_writer.writeheader()
	dict_writer.writerows(chats)


	def read_from_json(video_id):
	"""Get comments from a JSON file."""
	file_in = json.load(open(f"{video_id}.json"))

	# scrape all videos
	# this invocation (function.remote(variable)) follows ray's convention
	[fetch_live_chat.remote(url) for url in urls]
	# after executing the above list comprehension, two files are created. 1) VIDEO_ID.json, 2) VIDEO_ID.csv

	# if you've a set of videos that are already scraped, save their IDs in ignore_urls list
	# and run the below
	[fetch_live_chat.remote(url) for url in urls if url not in ignore_urls]
	# after executing the above list comprehension, two files are created. 1) VIDEO_ID.json, 2) VIDEO_ID.csv
No results found