Created
December 30, 2022 04:44
-
-
Save shellward/25536e715e82ce52ec6fbf583c0cb570 to your computer and use it in GitHub Desktop.
playing around with chatgpt, tts, gradio, selenium
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # You need to install the following packages | |
| # !pip3 install --upgrade revChatGPT | |
| # !pip3 install selenium | |
| # !pip3 install gtts | |
| # !pip3 install gradio | |
| # !pip3 install ipython | |
| # !pip3 install numpy | |
| # Really ought to learn how to properly architect python applications | |
| # instead of putting everything in one file like this | |
| # if you have a good resource, send it my way : ) | |
| # - @Shellworld1 | |
| # Imports | |
| import base64 | |
| import time | |
| import re | |
| from selenium import webdriver | |
| from selenium.webdriver.common.keys import Keys | |
| import time | |
| from revChatGPT.ChatGPT import Chatbot | |
| from selenium.webdriver.common.by import By | |
| from revChatGPT.ChatGPT import EC, WebDriverWait | |
| from IPython.display import HTML | |
| from gtts import gTTS | |
| import gradio as gr | |
| import numpy as np | |
| import os | |
| # This should probably follow the env convention, but this is a toy project | |
| # E.g. | |
| # from dotenv import load_dotenv | |
| # env = load_dotenv() | |
| # session_token = env.get("session_token") | |
| # Need to move this to env | |
| session_token = "Enter your session token from your OpenAI Auth0 cookie here" | |
| # Establish chatbot | |
| chatbot = Chatbot({"session_token": session_token}, conversation_id=None, parent_id=None) | |
| # Create staticFiles directory if it doesn't exist | |
| if not os.path.exists("StaticFiles"): | |
| os.makedirs("StaticFiles") | |
| """ | |
| ask_twitter: | |
| This function takes a topic and returns a list of tweets | |
| @param topic {string} : the topic to search for on twitter | |
| @return: paragraphs {list} : a list of tweets | |
| @calls: scroll_and_wait_for_tweets_in_steps, driver.find_elements, re.sub, driver.close | |
| """ | |
| def ask_twitter(topic): | |
| driver = webdriver.Chrome() | |
| driver.get("https://twitter.com/search?q=" + topic + "&f=live") | |
| driver = scroll_and_wait_for_tweets_in_steps(driver, 34000, 2, 4000) | |
| paragraphs= driver.find_elements(By.XPATH, "//div[@data-testid='tweetText']") | |
| paragraphs = [paragraph.text for paragraph in paragraphs] | |
| #strip urls and hashtags | |
| paragraphs = [re.sub(r'http\S+', '', paragraph) for paragraph in paragraphs] | |
| paragraphs = [re.sub(r'#\S+', '', paragraph) for paragraph in paragraphs] | |
| paragraphs = [re.sub(r'@\S+', '', paragraph) for paragraph in paragraphs] | |
| driver.close() | |
| return paragraphs | |
| """ | |
| scroll_and_wait: | |
| This function scrolls the page and waits for a specified amount of time | |
| @param driver {webdriver} : the webdriver to use | |
| @param scroll_height {int} : the height to scroll to | |
| @param wait_time {int} : the amount of time to wait | |
| @return: driver {webdriver} : the webdriver after scrolling and waiting | |
| @calls: driver.execute_script, time.sleep | |
| """ | |
| def scroll_and_wait(driver, scroll_height, wait_time): | |
| driver.execute_script("window.scrollTo(0, " + str(scroll_height) + ")") | |
| time.sleep(wait_time) | |
| return driver | |
| """ | |
| scroll_and_wait_for_tweets_in_steps: | |
| This function scrolls the page and waits for a specified amount of time in steps | |
| @param driver {webdriver} : the webdriver to use | |
| @param scroll_height {int} : the height to scroll to | |
| @param wait_time {int} : the amount of time to wait | |
| @param step_size {int} : the amount to scroll each step | |
| @return: driver {webdriver} : the webdriver after scrolling and waiting | |
| @calls: scroll_and_wait, WebDriverWait, EC.presence_of_all_elements_located, By.XPATH | |
| """ | |
| def scroll_and_wait_for_tweets_in_steps(driver, scroll_height, wait_time=2, step_size=10000): | |
| for i in range(0, scroll_height, step_size): | |
| driver = scroll_and_wait(driver, i, wait_time) | |
| WebDriverWait(driver, 25).until(EC.presence_of_all_elements_located((By.XPATH, "//div[@data-testid='tweetText']"))) | |
| return driver | |
| """ | |
| write_an_article: | |
| This function takes a topic and returns a short story | |
| @param topic {string} : the topic to search for on twitter | |
| @param existing_text {string} : the existing text to build off of | |
| @param tweets {list} : a list of tweets to build off of | |
| @return: final_answer_text {string} : the final answer text | |
| @return: tweets {list} : the list of tweets | |
| @return: conv_id {string} : the conversation id | |
| @return: parent_id {string} : the parent id | |
| @calls: ask_twitter, chatbot.ask | |
| """ | |
| def write_an_article(topic, existing_text="", tweets=[]): | |
| if len(tweets) == 0: | |
| tweets = ask_twitter(topic) | |
| prompt = f""" | |
| You are writing a short story building story lines from the tweet list below. Do not mention that the story is being built from tweets. | |
| Tweet List: | |
| {tweets} | |
| The story so far: | |
| {existing_text} | |
| Your Response: | |
| """ | |
| response = chatbot.ask(prompt, conversation_id=None, parent_id=None) | |
| final_answer_text, conv_id, parent_id = response.values() | |
| return final_answer_text, tweets, conv_id, parent_id | |
| """ | |
| iterate_on_article: | |
| This function takes a topic and returns a short story | |
| @param topic {string} : the topic to search for on twitter | |
| @param iterations {int} : the number of iterations to run | |
| @return: story_so_far {string} : the final answer text | |
| @calls: write_an_article | |
| """ | |
| def iterate_on_article(topic, iterations=2): | |
| story_so_far = "" | |
| initial_article = write_an_article(topic) | |
| story_so_far += initial_article[0] | |
| tweets = initial_article[1] | |
| #Ignoring these because it seems to track these automatically based on the session token | |
| conv_id = initial_article[2] | |
| parent_id = initial_article[3] | |
| for i in range(iterations): | |
| result = write_an_article(topic, story_so_far, tweets) | |
| story_so_far += result[0] | |
| tweets = result[1] | |
| return story_so_far | |
| """ | |
| turn_article_into_a: | |
| This function takes a topic and returns whatever the transform is | |
| @param story_so_far {string} : the story so far | |
| @param transform {string} : the transform to use | |
| @return: final_answer_text {string} : the final answer text | |
| @calls: chatbot.ask | |
| """ | |
| def turn_article_into_a(story_so_far, transform="Beckett play"): | |
| prompt = f""" | |
| You are converting the following article into a {transform}. | |
| The story so far: | |
| {story_so_far} | |
| Your Response: | |
| """ | |
| response = chatbot.ask(prompt, conversation_id=None, parent_id=None) | |
| final_answer_text, conv_id, parent_id = response.values() | |
| return final_answer_text | |
| """ | |
| continue_a: | |
| This function takes a topic and continues whatever the transform is | |
| @param story_so_far {string} : the story so far | |
| @param transform {string} : the transform to use | |
| @return: final_answer_text {string} : the final answer text | |
| @return: conv_id {string} : the conversation id | |
| @return: parent_id {string} : the parent id | |
| @calls: chatbot.ask | |
| """ | |
| def continue_a(story_so_far, transform="Beckett play"): | |
| prompt = f""" | |
| You are continuing the following {transform}. | |
| The story so far: | |
| {story_so_far} | |
| Your Response: | |
| """ | |
| response = chatbot.ask(prompt, conversation_id=None, parent_id=None) | |
| final_answer_text, conv_id, parent_id = response.values() | |
| return final_answer_text | |
| """ | |
| write_article_into_a: | |
| This function takes the composed article and transforms it | |
| @param topic {string} : the topic to search for on twitter | |
| @param iterations {int} : the number of iterations to run | |
| @param transform {string} : the transform to use | |
| @return: story_so_far {string} : the final answer text | |
| @calls: iterate_on_article, turn_article_into_a | |
| """ | |
| def write_article_into_a(topic, iterations=2, transform="Beckett play"): | |
| story_so_far = iterate_on_article(topic, iterations) | |
| return turn_article_into_a(story_so_far, transform) | |
| """ | |
| iterate_on_a: | |
| This function takes the composed work and continues it | |
| @param dialog {string} : the dialog to continue | |
| @param iterations {int} : the number of iterations to run | |
| @param transform {string} : the transform to use | |
| @return: story_so_far {string} : the final answer text | |
| @calls: continue_a | |
| """ | |
| def iterate_on_a(dialog, iterations=2, transform="Beckett play"): | |
| story_so_far = dialog | |
| for i in range(iterations): | |
| result = continue_a(story_so_far, transform) | |
| story_so_far += result | |
| return story_so_far | |
| """ | |
| complete_scrape_and_transform: | |
| This function scrapes twitter and transforms the result | |
| @param topic {string} : the topic to search for on twitter | |
| @param iterations {int} : the number of iterations to run | |
| @param transform {string} : the transform to use | |
| @return: story_so_far {string} : the final answer text | |
| @calls: write_article_into_a, iterate_on_a | |
| """ | |
| def complete_scrape_and_transform(topic, iterations=2, transform="Beckett play"): | |
| story_so_far = write_article_into_a(topic, iterations, transform) | |
| return iterate_on_a(story_so_far, iterations, transform) | |
| """ | |
| display_and_produce_tts: | |
| This function displays the result and produces the tts | |
| @param result {string} : the result to display | |
| @param twitter_topic {string} : the topic to search for on twitter | |
| @calls: gTTS, save | |
| @return: html {string} : the html to display | |
| @return: audio {string} : base64 ebncoded audio | |
| """ | |
| def display_and_produce_tts(result, twitter_topic): | |
| result_display = result.replace("\n", "<br>") | |
| tts = gTTS(result, lang='en') | |
| twitter_topic_underscore = twitter_topic.replace(" ", "_") | |
| tts.save(f"StaticFiles/{twitter_topic_underscore}.mp3") | |
| html = f""" | |
| <html> | |
| <body> | |
| <div style="width: 100%; height: 100%; overflow: scroll;"> | |
| <h1>{twitter_topic}</h1> | |
| <p>{result_display}</p> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| #commented out section for displaying in gradio as opposed to serving w flask | |
| # <audio controls> | |
| # <source src="{twitter_topic_underscore}.mp3" type="audio/mpeg"> | |
| # Your browser does not support the audio element. | |
| # </audio> | |
| #save html to be served (Optional) | |
| # with open(f"StaticFiles/{twitter_topic_underscore}.html", "w") as f: | |
| # f.write(html) | |
| audio =f"StaticFiles/{twitter_topic_underscore}.mp3" | |
| return html, audio | |
| """ | |
| news: | |
| This function produces a gradio interface | |
| @param twitter_topic {string} : the topic to search for on twitter | |
| @param iterations {int} : the number of iterations to run | |
| @param transform {string} : the transform to use | |
| @return: html {string} : the html to display | |
| @calls: write_article_into_a, display_and_produce_tts | |
| """ | |
| def news(twitter_topic, iterations, transform_into): | |
| if iterations == "": | |
| iterations = 1 | |
| else: | |
| iterations = int(iterations) | |
| if transform_into == "": | |
| transform_into = "three minute long radio news piece covering all the information in the article" | |
| result = write_article_into_a(twitter_topic, iterations, transform_into) | |
| html, audio = display_and_produce_tts(result, twitter_topic) | |
| return html, audio | |
| # Gradio interface | |
| interface = gr.Interface(news, | |
| [ | |
| gr.inputs.Textbox(lines=1, label="Twitter Topic", default="Heinz Ketchup", type="text"), | |
| gr.inputs.Textbox(lines=1, label="Iterations", default="1", type="text"), | |
| gr.inputs.Textbox(lines=1, label="Transform Into", default="three minute long radio news piece covering all the information in the article, covered by a randomly named reporter working for an undisclosed media organization", type="text"), | |
| ], | |
| outputs=[gr.outputs.HTML(label="Result"), gr.outputs.Audio(label="Audio", type="filepath")], | |
| title="RecentTweetsToSpeech", | |
| description="Generate text-to-speech from the latest twitter data regarding a topic. You can also transform the text into a different form, such as a play, song or poem.", | |
| allow_flagging=False, | |
| examples=[ | |
| ["Heinz Ketchup", "1", "three minute long radio news piece covering all the information in the article"], | |
| ["Pokemon Snap", "1", "article rewritten as a lyrical poem in the style of David Byrne"], | |
| ] | |
| ) | |
| # Serve the application | |
| interface.launch(inline=True, share=True, auth=("admin", "digimonOtis"), enable_queue=True) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment