nikitastaf1996 · January 26, 2023 17:21
diff --git a/Using GPT Index to ask questions about webpages.py b/Using GPT Index to ask questions about webpages.py
 import re
 import requests
 from bs4 import BeautifulSoup
 from typing import Optional, Callable
 from gpt_index.langchain_helpers.text_splitter import TokenTextSplitter
 from gpt_index import Document,GPTListIndex

 def runquery(url_and_query):
       #extract url and query 
       url_match = re.search(r"url:\[(.*)\]", url_and_query)
       query_match = re.search(r"query:\[(.*)\]", url_and_query)
       url = url_match.group(1)
       query = query_match.group(1)
       
       #download page and extract text
       response = requests.get(url)
       soup = BeautifulSoup(response.text, "html.parser")
       text = soup.get_text()
        
        #split webpage into chunks and put into index
       text_splitter = TokenTextSplitter(separator=" ", chunk_size=2048, chunk_overlap=20)
       text_chunks = text_splitter.split_text(text)
       doc_chunks = [Document(t) for t in text_chunks]
       index = GPTListIndex([])
       for doc_chunk in doc_chunks:
           index.insert(doc_chunk)
            
        #get response from index
       response = index.query(query)
        
       return str(response)

 tools = [
    Tool(
        name = "query",
        func=runquery,
        description="You can use this tool to ask questions about content of webpage.Please use this exact format for action input url:[https://www.google.com] - query:[Hello]"
    )]

 agent.run("What is this article about https://abcnews.go.com/Health/us-reels-multiple-mass-shootings-loneliness-trigger-violence/story?id=96632046")
	import re
	import requests
	from bs4 import BeautifulSoup
	from typing import Optional, Callable
	from gpt_index.langchain_helpers.text_splitter import TokenTextSplitter
	from gpt_index import Document,GPTListIndex

	def runquery(url_and_query):
	#extract url and query
	url_match = re.search(r"url:\[(.*)\]", url_and_query)
	query_match = re.search(r"query:\[(.*)\]", url_and_query)
	url = url_match.group(1)
	query = query_match.group(1)

	#download page and extract text
	response = requests.get(url)
	soup = BeautifulSoup(response.text, "html.parser")
	text = soup.get_text()

	#split webpage into chunks and put into index
	text_splitter = TokenTextSplitter(separator=" ", chunk_size=2048, chunk_overlap=20)
	text_chunks = text_splitter.split_text(text)
	doc_chunks = [Document(t) for t in text_chunks]
	index = GPTListIndex([])
	for doc_chunk in doc_chunks:
	index.insert(doc_chunk)

	#get response from index
	response = index.query(query)

	return str(response)

	tools = [
	Tool(
	name = "query",
	func=runquery,
	description="You can use this tool to ask questions about content of webpage.Please use this exact format for action input url:[https://www.google.com] - query:[Hello]"
	)]

	agent.run("What is this article about https://abcnews.go.com/Health/us-reels-multiple-mass-shootings-loneliness-trigger-violence/story?id=96632046")
No results found