Skip to content

Instantly share code, notes, and snippets.

@JarbasAl
Created April 10, 2025 15:28
Show Gist options
  • Select an option

  • Save JarbasAl/fdf3dce918ac29120dd5f90eb331d158 to your computer and use it in GitHub Desktop.

Select an option

Save JarbasAl/fdf3dce918ac29120dd5f90eb331d158 to your computer and use it in GitHub Desktop.
import requests
TRIPLE_VALIDATION_PROMPT = """
You are a triple validator for a personal knowledge graph.
Given an utterance that a user spoke to a voice assistant and a candidate triple, your task is to validate the triple
Utterances about the user usually have the form of "I am ...." or "My ..."
Utterances about the assistant usually have the form of "You are ...." or "Your ..."
Knowledge about the broader world should be discarded, you are only interested in personal information about the user or the voice assistant
Each triple is in the format:
(subject, predicate, object)
Only return 'True' if:
- The subject is 'self' (the assistant) or 'user' (the user)
- The triple is about user or assistant personal information
- The triple is factually plausible and makes sense
- The triple DOES NOT contradict the utterance
Otherwise, return 'False'.
Examples of valid triples:
"my favorite color is green" - ("user", "schema:favoriteColor", "green")
"your favorite color is blue" - ("self", "schema:favoriteColor", "blue")
Examples of invalid triples:
"my favorite color is green" - ("user", "schema:favoriteColor", "red")
"I love the color green" - ("self", "schema:favoriteColor", "green")
"your favorite color is blue" - ("user", "schema:favoriteColor", "blue")
YOU MUST answer with only one word: True or False.
The user said: "{utterance}"
Candidate triple: {triple}
"""
def validate_triple_ollama(utterance, triple, model="gemma2"):
prompt = TRIPLE_VALIDATION_PROMPT.format(utterance=utterance,
triple=repr(triple))
response = requests.post(
"http://100.88.41.41:11434/api/generate",
json={
"model": model,
"prompt": prompt,
"stream": False,
"options": {
"temperature": 0.0,
"num_predict": 1,
"stop": ["\n"]
}
}
)
result = response.json()["response"].strip()
return result.lower() == "true"
if __name__ == "__main__":
valid = [
("my favorite color is green", ("user", "schema:favoriteColor", "green")),
("your favorite color is green", ("self", "schema:favoriteColor", "green"))
]
invalid = [
("my favorite color is green", ("user", "schema:favoriteColor", "red")),
("my favorite color is green", ("self", "schema:favoriteColor", "green")),
("your favorite color is green", ("user", "schema:favoriteColor", "green")),
("your favorite color is green", ("self", "schema:favoriteColor", "red")),
]
valid += [
("I am a software engineer", ("user", "schema:occupation", "software engineer")),
("You are an assistant", ("self", "schema:occupation", "assistant")),
("my name is John", ("user", "schema:name", "John")),
("your name is Ollama", ("self", "schema:name", "Ollama")),
("I live in New York", ("user", "schema:homeLocation", "New York")),
("You are located in the cloud", ("self", "schema:homeLocation", "cloud")),
("my birthday is January 1st", ("user", "schema:birthDate", "January 1st")),
("your favorite food is pizza", ("self", "schema:favoriteFood", "pizza"))
]
invalid += [
("I am a software engineer", ("self", "schema:occupation", "software engineer")),
("You are an assistant", ("user", "schema:occupation", "assistant")),
("my name is John", ("self", "schema:name", "John")),
("your name is Ollama", ("user", "schema:name", "Ollama")),
("I live in New York", ("self", "schema:homeLocation", "New York")),
("You are located in the cloud", ("user", "schema:homeLocation", "cloud")),
("my birthday is January 1st", ("self", "schema:birthDate", "January 1st")),
("your favorite food is pizza", ("user", "schema:favoriteFood", "pizza"))
]
valid += [
("I am a doctor", ("user", "schema:occupation", "doctor")),
("You are a virtual assistant", ("self", "schema:occupation", "virtual assistant")),
("My name is Alice", ("user", "schema:name", "Alice")),
("Your name is Siri", ("self", "schema:name", "Siri")),
("I reside in London", ("user", "schema:homeLocation", "London")),
("You are based in the cloud", ("self", "schema:homeLocation", "cloud")),
("My birthdate is December 25th", ("user", "schema:birthDate", "December 25th")),
("Your favorite drink is coffee", ("self", "schema:favoriteFood", "coffee")),
("I am a writer", ("user", "schema:occupation", "writer")),
("Your favorite color is red", ("self", "schema:favoriteColor", "red"))
]
invalid += [
("I am a doctor", ("self", "schema:occupation", "doctor")),
("You are a virtual assistant", ("user", "schema:occupation", "virtual assistant")),
("My name is Alice", ("self", "schema:name", "Alice")),
("Your name is Siri", ("user", "schema:name", "Siri")),
("I reside in London", ("self", "schema:homeLocation", "London")),
("You are based in the cloud", ("user", "schema:homeLocation", "cloud")),
("My birthdate is December 25th", ("self", "schema:birthDate", "December 25th")),
("Your favorite drink is coffee", ("user", "schema:favoriteFood", "coffee")),
("I am a writer", ("self", "schema:occupation", "writer")),
("Your favorite color is red", ("user", "schema:favoriteColor", "red"))
]
valid += [
("I have been working as an engineer for 10 years", ("user", "schema:occupation", "engineer")),
("You have been active since 2020", ("self", "schema:activeSince", "2020")),
("My favorite sport is basketball", ("user", "schema:favoriteSport", "basketball")),
("Your primary role is to assist users", ("self", "schema:primaryRole", "assist users")),
("I celebrate my birthday every year on October 15th", ("user", "schema:birthDate", "October 15th"))
]
invalid += [
("I have been working as an engineer for 10 years", ("self", "schema:occupation", "engineer")),
("You have been active since 2020", ("user", "schema:activeSince", "2020")),
("My favorite sport is basketball", ("self", "schema:favoriteSport", "basketball")),
("Your primary role is to assist users", ("user", "schema:primaryRole", "assist users")),
("I celebrate my birthday every year on October 15th", ("self", "schema:birthDate", "October 15th"))
]
valid += [
("I love coding", ("user", "schema:favoriteHobby", "coding")),
("You help people with their tasks", ("self", "schema:primaryRole", "help people")),
("My city of residence is Tokyo", ("user", "schema:homeLocation", "Tokyo")),
("Your preferred language is Python", ("self", "schema:preferredLanguage", "Python"))
]
invalid += [
("I love coding", ("self", "schema:favoriteHobby", "coding")),
("You help people with their tasks", ("user", "schema:primaryRole", "help people")),
("My city of residence is Tokyo", ("self", "schema:homeLocation", "Tokyo")),
("Your preferred language is Python", ("user", "schema:preferredLanguage", "Python")),
]
valid += [
("I am not interested in sports", ("user", "schema:interestInSports", "no")),
("You are not programmed to have preferences", ("self", "schema:preferences", "none"))
]
invalid += [
("I am not interested in sports", ("self", "schema:interestInSports", "no")),
("You are not programmed to have preferences", ("user", "schema:preferences", "none"))
]
MODELS = ["tinyllama",
"gemma3:1b",
"gemma3",
"llama3.1",
"phi4-mini",
"qwen2.5",
"command-r7b",
"glm4",
"granite3.2",
"openchat",
"gemma2",
"gemma3:12b"]
for model in MODELS:
print("\n#######################")
print(f"## Testing model: {model}")
print()
correct = 0
wrong = 0
fn = 0
fp = 0
for utt, triple in valid:
if validate_triple_ollama(utt, triple, model):
correct += 1
else:
wrong += 1
fn += 1
print(utt, triple)
print("❌ Incorrectly assigned as Invalid triple")
for utt, triple in invalid:
if validate_triple_ollama(utt, triple, model):
wrong += 1
fp += 1
print(utt, triple)
print("❌ Incorrectly assigned as Valid triple")
else:
correct += 1
print()
print("Model:", model)
print("Total correct predictions:", correct)
print("Total wrong predictions:", wrong)
print(" - false positives:", fp)
print(" - false negatives:", fn)
# Model: gemma3:1b
# Total correct predictions: 33
# Total wrong predictions: 31
# - false positives: 28
# - false negatives: 3
#
# Model: tinyllama
# Total correct predictions: 33
# Total wrong predictions: 31
# - false positives: 0
# - false negatives: 31
#
# Model: gemma3
# Total correct predictions: 42
# Total wrong predictions: 22
# - false positives: 22
# - false negatives: 0
#
# Model: phi4-mini
# Total correct predictions: 46
# Total wrong predictions: 18
# - false positives: 17
# - false negatives: 1
#
# Model: glm4
# Total correct predictions: 48
# Total wrong predictions: 16
# - false positives: 15
# - false negatives: 1
#
# Model: command-r7b
# Total correct predictions: 49
# Total wrong predictions: 15
# - false positives: 9
# - false negatives: 6
#
# Model: llama3.1
# Total correct predictions: 50
# Total wrong predictions: 14
# - false positives: 8
# - false negatives: 6
#
# Model: qwen2.5
# Total correct predictions: 50
# Total wrong predictions: 14
# - false positives: 8
# - false negatives: 6
#
# Model: openchat
# Total correct predictions: 54
# Total wrong predictions: 10
# - false positives: 9
# - false negatives: 1
#
# Model: granite3.2
# Total correct predictions: 58
# Total wrong predictions: 6
# - false positives: 4
# - false negatives: 2
#
# Model: gemma2
# Total correct predictions: 61
# Total wrong predictions: 3
# - false positives: 2
# - false negatives: 1
#
# Model: gemma3:12b
# Total correct predictions: 62
# Total wrong predictions: 2
# - false positives: 1
# - false negatives: 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment