Skip to content

Instantly share code, notes, and snippets.

@gustavopinto
Created May 26, 2025 14:25
Show Gist options
  • Select an option

  • Save gustavopinto/ac3ce47e18109e6881889448acb70c9d to your computer and use it in GitHub Desktop.

Select an option

Save gustavopinto/ac3ce47e18109e6881889448acb70c9d to your computer and use it in GitHub Desktop.
LLM4Devs -- Turma 5
OPENAI_API_KEY="sua-chave-aqui"
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
db = Chroma(persist_directory="bootcamp", embedding_function=embeddings)
results = db.similarity_search("What is CDD in programming?", k=3)
for r in results:
print(r.page_content)
print("\n\n")
from scipy.spatial import distance
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
user_query = embeddings.embed_query("CDD")
docs = [
"language models",
"embeddings",
"pizza",
"limonada"
]
docs_embed = embeddings.embed_documents(docs)
print(1 - distance.cosine(user_query, docs_embed[0]))
print(1 - distance.cosine(user_query, docs_embed[1]))
print(1 - distance.cosine(user_query, docs_embed[2]))
print(1 - distance.cosine(user_query, docs_embed[3]))
import sys
import numpy as np
from scipy.spatial import distance
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
## fazer hello world na openai
llm = ChatOpenAI()
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
# resposta = llm.invoke("Oi, meu nome é Gustavo, qual é o seu?")
# print(resposta.content)
loader = PyPDFLoader("https://arxiv.org/pdf/2210.07342")
documentos = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=20)
chunks = splitter.split_documents(documentos)
chunk_stringao = [documento.page_content for documento in documentos]
# vectorstore = Chroma.from_texts(
# texts=chunk_stringao,
# embedding=embeddings,
# persist_directory="bootcamp"
# )
# stringao = " ".join(stringao)
#print(stringao)
## E se eu fornecer pra LLM o paper do CDD?
user_query = "What is CDD in programming?"
user_query_emb = embeddings.embed_query(user_query)
chunks_relacionados_a_user_query = embeddings.embed_documents(chunk_stringao)
stringao_de_chunks_relacionados = []
for idx, chunk_numerico in enumerate(chunks_relacionados_a_user_query):
chunk_texto = chunk_stringao[idx]
distancia = (1- distance.cosine(user_query_emb, chunk_numerico))
if distancia > 0.55:
stringao_de_chunks_relacionados.append(chunk_texto)
print(distancia, chunk_texto)
resposta_cdd = llm.invoke(user_query + "------------" "".join(stringao_de_chunks_relacionados))
print(resposta_cdd.content)
scipy
pypdf
langchain
langchain-openai
langchain-community
langchain-chroma
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment