Skip to content

Instantly share code, notes, and snippets.

@jnorthrup
Created June 3, 2025 15:27
Show Gist options
  • Select an option

  • Save jnorthrup/e803527c45e41b2eb2f13e24c9448b0c to your computer and use it in GitHub Desktop.

Select an option

Save jnorthrup/e803527c45e41b2eb2f13e24c9448b0c to your computer and use it in GitHub Desktop.
import collections
import random
from dataclasses import dataclass
from typing import List, Dict, Optional, Set, Tuple
# Simplified NLTK setup
NLTK_FULLY_AVAILABLE = False
nltk_word_tokenize_func = None # Renamed to avoid conflict if nltk itself is imported
nltk_WordNetLemmatizer_class = None # Renamed
try:
import nltk
# Check for 'punkt' for tokenization
try:
nltk.data.find('tokenizers/punkt')
from nltk.tokenize import word_tokenize as imported_wt
nltk_word_tokenize_func = imported_wt
except Exception:
print("NLTK 'punkt' (tokenizer) not found or import failed. Basic tokenization will be used.")
nltk_word_tokenize_func = None
# Check for 'wordnet' and 'omw-1.4' for lemmatization
try:
nltk.data.find('corpora/wordnet')
nltk.data.find('corpora/omw-1.4')
from nltk.stem import WordNetLemmatizer as imported_wnl
nltk_WordNetLemmatizer_class = imported_wnl
except Exception:
print("NLTK 'wordnet'/'omw-1.4' (lemmatizer) not found or import failed. Basic lemmatization will be used.")
nltk_WordNetLemmatizer_class = None
if nltk_word_tokenize_func and nltk_WordNetLemmatizer_class:
NLTK_FULLY_AVAILABLE = True
print("NLTK tokenizer and lemmatizer appear to be available.")
elif nltk_word_tokenize_func:
print("NLTK tokenizer available; basic lemmatization will be used.")
elif nltk_WordNetLemmatizer_class:
print("NLTK lemmatizer available; basic tokenization will be used.")
else:
if 'nltk' in globals() or 'nltk' in locals(): # if nltk module was imported but resources failed
print("NLTK module imported, but resources for tokenization/lemmatization are not available. Full fallback.")
else: # This case should ideally be caught by the outer ImportError
print("Neither NLTK tokenizer nor lemmatizer are available (module import likely failed). Full fallback.")
except ImportError:
print("NLTK module not found. Full fallback (basic tokenization and lemmatization).")
# --- Configuration ---
MARKOV_CHAIN_ORDER = 1
BUFFER_CAPACITY = 10
MAX_PRODUCTIONS_TO_RUN = 15
TRUTH_VALUE_SCALING_FACTOR = 100.0
@dataclass(frozen=True)
class NarseseTerm:
name: str
def __str__(self) -> str: return self.name
@dataclass
class NarseseStatement:
subject: NarseseTerm
term_object: NarseseTerm
predicate: str = "==>"
frequency: float = 0.0
confidence: float = 0.0
def __str__(self) -> str:
f_scaled = self.frequency * TRUTH_VALUE_SCALING_FACTOR
return f"({self.subject.name} {self.predicate} {self.term_object.name}) {{f={f_scaled:.2f}, c={self.confidence:.2f}}}"
def __hash__(self): return hash((self.subject, self.predicate, self.term_object))
def __eq__(self, other):
if not isinstance(other, NarseseStatement): return NotImplemented
return (self.subject == other.subject and
self.predicate == other.predicate and
self.term_object == other.term_object)
class TextProcessor:
def __init__(self):
self.lemmatizer_instance = None
self._use_nltk_tokenizer = False
if NLTK_FULLY_AVAILABLE and nltk_WordNetLemmatizer_class:
try:
self.lemmatizer_instance = nltk_WordNetLemmatizer_class()
# We also need tokenizer for full NLTK processing
if nltk_word_tokenize_func:
self._use_nltk_tokenizer = True
print("TextProcessor: Using NLTK for tokenization and lemmatization.")
else: # Should not happen if NLTK_FULLY_AVAILABLE is true
print("TextProcessor: NLTK Lemmatizer available, but tokenizer missing. Using basic tokenization.")
except Exception as e:
print(f"TextProcessor: Failed to init NLTK lemmatizer ({e}), falling back for lemmatization.")
self.lemmatizer_instance = None
elif nltk_word_tokenize_func: # Only tokenizer is available
self._use_nltk_tokenizer = True
print("TextProcessor: Using NLTK for tokenization, basic lemmatization.")
else:
print("TextProcessor: Using basic tokenization and lemmatization (fallback).")
def get_lemmatized_tokens(self, text: str) -> List[str]:
current_text_lower = text.lower()
raw_tokens: List[str]
if self._use_nltk_tokenizer and nltk_word_tokenize_func:
try:
raw_tokens = nltk_word_tokenize_func(current_text_lower)
except Exception as e:
print(f"NLTK tokenization failed during use: {e}. Falling back.")
raw_tokens = current_text_lower.split()
else:
raw_tokens = current_text_lower.split()
lemmatized_tokens: List[str]
if self.lemmatizer_instance:
try:
lemmatized_tokens = [self.lemmatizer_instance.lemmatize(token) for token in raw_tokens]
except Exception as e:
print(f"NLTK lemmatization failed during use: {e}. Falling back.")
lemmatized_tokens = list(raw_tokens)
else:
lemmatized_tokens = list(raw_tokens)
cleaned_tokens = [token for token in lemmatized_tokens if token.isalnum() and token]
return cleaned_tokens
class MarkovChain:
def __init__(self):
self.transitions: Dict[str, Dict[str, int]] = collections.defaultdict(lambda: collections.defaultdict(int))
self.state_totals: Dict[str, int] = collections.defaultdict(int)
def add_transition(self, from_state: str, to_state: str):
self.transitions[from_state][to_state] += 1
self.state_totals[from_state] += 1
def get_next_state(self, current_state: str) -> Optional[NarseseTerm]:
possible_next_states = self.transitions.get(current_state)
if not possible_next_states: return None
total_weight = self.state_totals.get(current_state, 0)
if total_weight == 0: return None
states = list(possible_next_states.keys())
weights = list(possible_next_states.values())
if not states: return None
chosen_state = random.choices(states, weights=weights, k=1)[0]
return NarseseTerm(chosen_state)
def get_transition_belief(self, from_state: str, to_state: str) -> Optional[NarseseStatement]:
count = self.transitions.get(from_state, {}).get(to_state)
if count is None: return None
total_from = self.state_totals.get(from_state)
if total_from is None or total_from == 0: return None
frequency = float(count) / float(total_from)
confidence = 1.0 - (1.0 / (1.0 + count))
return NarseseStatement(subject=NarseseTerm(from_state), term_object=NarseseTerm(to_state), frequency=frequency, confidence=confidence)
def get_known_states(self) -> Set[str]: return set(self.transitions.keys())
def print_model(self):
print("\n--- Markov Chain Model (Compiled Knowledge) ---")
for from_state, to_map in self.transitions.items():
total_for_from = self.state_totals.get(from_state, 0)
print(f"From '{from_state}' (total {total_for_from}):")
for to_state, count in to_map.items():
prob = (float(count) / float(total_for_from)) if total_for_from > 0 else 0.0
print(f" -> '{to_state}': {count} times (prob: {prob:.3f})")
class CircularBeliefBuffer:
def __init__(self, capacity: int):
self.capacity = capacity
self.buffer: collections.deque[NarseseStatement] = collections.deque()
self.garbage_collected_count = 0
def add(self, statement: NarseseStatement):
evicted_statement: Optional[NarseseStatement] = None
if len(self.buffer) >= self.capacity:
evicted_statement = self.buffer.popleft()
self.garbage_collected_count += 1
self.buffer.append(statement)
eviction_info = f"Evicted: {evicted_statement}." if evicted_statement else ""
print(f"BUFFER: Added: {statement}. {eviction_info} GC Count: {self.garbage_collected_count}")
def promote(self, statement: NarseseStatement) -> bool:
try:
self.buffer.remove(statement)
self.buffer.append(statement)
print(f"BUFFER: Promoted {statement} to forefront.")
return True
except ValueError:
return False
def get_forefront_knowledge(self) -> List[NarseseStatement]: return list(reversed(self.buffer))
def display(self):
print("\n--- NARS Router (Circular Belief Buffer State - Newest First) ---")
if not self.buffer: print("Buffer is empty."); return
for i, statement in enumerate(self.get_forefront_knowledge()): print(f"{i + 1}. {statement}")
print("-------------------------------------------------------------")
class TruffleCompiledNARSMarkov:
def __init__(self):
self.markov_chain = MarkovChain()
self.belief_buffer = CircularBeliefBuffer(BUFFER_CAPACITY)
self.text_processor = TextProcessor()
self.current_production_state: Optional[NarseseTerm] = None
def compile_source(self, text: str):
print("\n--- COMPILATION PHASE ---")
tokens = self.text_processor.get_lemmatized_tokens(text)
print(f"Lemmatized Tokens: {tokens}")
if len(tokens) < MARKOV_CHAIN_ORDER + 1:
print(f"Not enough tokens to build Markov chain of order {MARKOV_CHAIN_ORDER}.")
return
for i in range(len(tokens) - MARKOV_CHAIN_ORDER):
current_state_token = tokens[i]
next_state_token = tokens[i + MARKOV_CHAIN_ORDER]
self.markov_chain.add_transition(current_state_token, next_state_token)
self.markov_chain.print_model()
def run_productions(self, start_token: Optional[str] = None, max_steps: int = MAX_PRODUCTIONS_TO_RUN):
print("\n--- PRODUCTION SYSTEM EXECUTION PHASE ---")
known_states = self.markov_chain.get_known_states()
if not known_states: print("Markov chain is empty. Cannot run productions."); return
if start_token and start_token in known_states:
self.current_production_state = NarseseTerm(start_token)
else:
if not known_states:
print("No known states to pick a random start from.")
return
self.current_production_state = NarseseTerm(random.choice(list(known_states)))
if not self.current_production_state: print("Could not determine a starting state."); return
print(f"Starting production with initial state: {self.current_production_state.name}")
for i in range(max_steps):
current_state_name = self.current_production_state.name
next_state_term = self.markov_chain.get_next_state(current_state_name)
if next_state_term is None:
print(f"Production halted: No next state from '{current_state_name}'.")
remaining_states = known_states - {current_state_name}
if remaining_states:
self.current_production_state = NarseseTerm(random.choice(list(remaining_states)))
print(f"Restarting production from new random state: {self.current_production_state.name}")
continue
else: print("No other states to transition to. Stopping."); break
production_output_belief = self.markov_chain.get_transition_belief(current_state_name, next_state_term.name)
if production_output_belief:
print(f"\n[Step {i+1}] Fired Production: {production_output_belief}")
self.belief_buffer.add(production_output_belief)
self.belief_buffer.display()
self.current_production_state = production_output_belief.term_object
else:
print(f"Production Error: Could not form belief for {current_state_name} -> {next_state_term.name if next_state_term else 'None'}"); break
print("\n--- Production run finished ---")
def main_script_logic():
print("=== Python Truffle-inspired NARS Markov Production System Demo ===")
sample_text = """
The quick brown fox jumps over the lazy dog.
The lazy dog barks. The fox runs away.
A dog is a man's best friend. A fox is a wild animal.
The dog and fox are animals. Animals live in the wild or with man.
The quick fox is quick. The lazy dog is lazy.
""".strip()
nars_system = TruffleCompiledNARSMarkov()
nars_system.compile_source(sample_text)
start_tokens = nars_system.markov_chain.get_known_states()
chosen_start = None
if "dog" in start_tokens: chosen_start = "dog"
elif start_tokens: chosen_start = list(start_tokens)[0]
if chosen_start:
nars_system.run_productions(start_token=chosen_start, max_steps=MAX_PRODUCTIONS_TO_RUN)
else:
print("Cannot start productions, no suitable start token found or Markov model is empty.")
print("\n--- Final Belief Buffer State ---")
nars_system.belief_buffer.display()
print("\n=== Demo Finished ===")
if __name__ == "__main__":
main_script_logic()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment