Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save nikitastaf1996/d61b76edb918c72a1199e89561ae9330 to your computer and use it in GitHub Desktop.

Select an option

Save nikitastaf1996/d61b76edb918c72a1199e89561ae9330 to your computer and use it in GitHub Desktop.
import re
import requests
from bs4 import BeautifulSoup
from typing import Optional, Callable
from gpt_index.langchain_helpers.text_splitter import TokenTextSplitter
from gpt_index import Document,GPTListIndex
def runquery(url_and_query):
#extract url and query
url_match = re.search(r"url:\[(.*)\]", url_and_query)
query_match = re.search(r"query:\[(.*)\]", url_and_query)
url = url_match.group(1)
query = query_match.group(1)
#download page and extract text
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
text = soup.get_text()
#split webpage into chunks and put into index
text_splitter = TokenTextSplitter(separator=" ", chunk_size=2048, chunk_overlap=20)
text_chunks = text_splitter.split_text(text)
doc_chunks = [Document(t) for t in text_chunks]
index = GPTListIndex([])
for doc_chunk in doc_chunks:
index.insert(doc_chunk)
#get response from index
response = index.query(query)
return str(response)
tools = [
Tool(
name = "query",
func=runquery,
description="You can use this tool to ask questions about content of webpage.Please use this exact format for action input url:[https://www.google.com] - query:[Hello]"
)]
agent.run("What is this article about https://abcnews.go.com/Health/us-reels-multiple-mass-shootings-loneliness-trigger-violence/story?id=96632046")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment