Created
January 26, 2023 17:21
-
-
Save nikitastaf1996/d61b76edb918c72a1199e89561ae9330 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from typing import Optional, Callable | |
| from gpt_index.langchain_helpers.text_splitter import TokenTextSplitter | |
| from gpt_index import Document,GPTListIndex | |
| def runquery(url_and_query): | |
| #extract url and query | |
| url_match = re.search(r"url:\[(.*)\]", url_and_query) | |
| query_match = re.search(r"query:\[(.*)\]", url_and_query) | |
| url = url_match.group(1) | |
| query = query_match.group(1) | |
| #download page and extract text | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| text = soup.get_text() | |
| #split webpage into chunks and put into index | |
| text_splitter = TokenTextSplitter(separator=" ", chunk_size=2048, chunk_overlap=20) | |
| text_chunks = text_splitter.split_text(text) | |
| doc_chunks = [Document(t) for t in text_chunks] | |
| index = GPTListIndex([]) | |
| for doc_chunk in doc_chunks: | |
| index.insert(doc_chunk) | |
| #get response from index | |
| response = index.query(query) | |
| return str(response) | |
| tools = [ | |
| Tool( | |
| name = "query", | |
| func=runquery, | |
| description="You can use this tool to ask questions about content of webpage.Please use this exact format for action input url:[https://www.google.com] - query:[Hello]" | |
| )] | |
| agent.run("What is this article about https://abcnews.go.com/Health/us-reels-multiple-mass-shootings-loneliness-trigger-violence/story?id=96632046") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment