Skip to content

Instantly share code, notes, and snippets.

@nuhmanpk
Created August 2, 2025 16:45
Show Gist options
  • Select an option

  • Save nuhmanpk/a4b652d20294745a9e6a75d983a670d6 to your computer and use it in GitHub Desktop.

Select an option

Save nuhmanpk/a4b652d20294745a9e6a75d983a670d6 to your computer and use it in GitHub Desktop.
rag using langchain and ollama
import streamlit as st
import time
from langchain_community.document_loaders import SeleniumURLLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_ollama import OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
model_name = "llama3.2"
# Initialize session state
st.session_state.setdefault("vector_store", InMemoryVectorStore(OllamaEmbeddings(model=model_name)))
st.session_state.setdefault("last_scraped_url", None)
llm = OllamaLLM(model=model_name)
template = """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""
prompt = ChatPromptTemplate.from_template(template)
def load_page(url):
st.toast("πŸ” Scraping content from the URL...")
loader = SeleniumURLLoader(
urls=[url],
executable_path="/opt/homebrew/bin/chromedriver",
)
documents = loader.load()
st.toast("βœ… Page content scraped.")
return documents
def split_text(documents):
st.toast("🧩 Splitting text into chunks...")
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=200, add_start_index=True
)
doc_list = text_splitter.split_documents(documents)
st.toast(f"βœ… Split into {len(doc_list)} chunks.")
return doc_list
def index_docs(documents):
st.toast("🧠 Updating memory with new content...")
st.session_state.vector_store.add_documents(documents)
st.toast("βœ… Memory updated.")
def retrieve_docs(query):
st.toast("πŸ”Ž Searching memory for relevant context...")
results = st.session_state.vector_store.similarity_search(query)
st.toast(f"βœ… Found {len(results)} relevant pieces of context.")
return results
def answer_question(question, context):
st.toast("πŸ’¬ Thinking...")
chain = prompt | llm
return chain.invoke({"question": question, "context": context})
# Streamlit UI
st.title("πŸ•·οΈ AI Web Crawler & Q&A")
url = st.text_input("🌐 Enter a URL to scrape:")
# Only scrape if new or different URL
if url and url.strip() and url != st.session_state.last_scraped_url:
try:
documents = load_page(url)
chunked_docs = split_text(documents=documents)
index_docs(chunked_docs)
st.session_state.last_scraped_url = url # Update the session state
except Exception as e:
st.toast(f"❌ Failed to load page: {e}")
question = st.chat_input("πŸ’¬ Ask a question based on the page content:")
if question:
st.chat_message('user').write(question)
retrieved_docs = retrieve_docs(question)
context = "\n\n".join([doc.page_content for doc in retrieved_docs])
answer = answer_question(question, context)
st.chat_message('assistant').write(answer)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment