Created
June 3, 2025 15:27
-
-
Save jnorthrup/e803527c45e41b2eb2f13e24c9448b0c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import collections | |
| import random | |
| from dataclasses import dataclass | |
| from typing import List, Dict, Optional, Set, Tuple | |
| # Simplified NLTK setup | |
| NLTK_FULLY_AVAILABLE = False | |
| nltk_word_tokenize_func = None # Renamed to avoid conflict if nltk itself is imported | |
| nltk_WordNetLemmatizer_class = None # Renamed | |
| try: | |
| import nltk | |
| # Check for 'punkt' for tokenization | |
| try: | |
| nltk.data.find('tokenizers/punkt') | |
| from nltk.tokenize import word_tokenize as imported_wt | |
| nltk_word_tokenize_func = imported_wt | |
| except Exception: | |
| print("NLTK 'punkt' (tokenizer) not found or import failed. Basic tokenization will be used.") | |
| nltk_word_tokenize_func = None | |
| # Check for 'wordnet' and 'omw-1.4' for lemmatization | |
| try: | |
| nltk.data.find('corpora/wordnet') | |
| nltk.data.find('corpora/omw-1.4') | |
| from nltk.stem import WordNetLemmatizer as imported_wnl | |
| nltk_WordNetLemmatizer_class = imported_wnl | |
| except Exception: | |
| print("NLTK 'wordnet'/'omw-1.4' (lemmatizer) not found or import failed. Basic lemmatization will be used.") | |
| nltk_WordNetLemmatizer_class = None | |
| if nltk_word_tokenize_func and nltk_WordNetLemmatizer_class: | |
| NLTK_FULLY_AVAILABLE = True | |
| print("NLTK tokenizer and lemmatizer appear to be available.") | |
| elif nltk_word_tokenize_func: | |
| print("NLTK tokenizer available; basic lemmatization will be used.") | |
| elif nltk_WordNetLemmatizer_class: | |
| print("NLTK lemmatizer available; basic tokenization will be used.") | |
| else: | |
| if 'nltk' in globals() or 'nltk' in locals(): # if nltk module was imported but resources failed | |
| print("NLTK module imported, but resources for tokenization/lemmatization are not available. Full fallback.") | |
| else: # This case should ideally be caught by the outer ImportError | |
| print("Neither NLTK tokenizer nor lemmatizer are available (module import likely failed). Full fallback.") | |
| except ImportError: | |
| print("NLTK module not found. Full fallback (basic tokenization and lemmatization).") | |
| # --- Configuration --- | |
| MARKOV_CHAIN_ORDER = 1 | |
| BUFFER_CAPACITY = 10 | |
| MAX_PRODUCTIONS_TO_RUN = 15 | |
| TRUTH_VALUE_SCALING_FACTOR = 100.0 | |
| @dataclass(frozen=True) | |
| class NarseseTerm: | |
| name: str | |
| def __str__(self) -> str: return self.name | |
| @dataclass | |
| class NarseseStatement: | |
| subject: NarseseTerm | |
| term_object: NarseseTerm | |
| predicate: str = "==>" | |
| frequency: float = 0.0 | |
| confidence: float = 0.0 | |
| def __str__(self) -> str: | |
| f_scaled = self.frequency * TRUTH_VALUE_SCALING_FACTOR | |
| return f"({self.subject.name} {self.predicate} {self.term_object.name}) {{f={f_scaled:.2f}, c={self.confidence:.2f}}}" | |
| def __hash__(self): return hash((self.subject, self.predicate, self.term_object)) | |
| def __eq__(self, other): | |
| if not isinstance(other, NarseseStatement): return NotImplemented | |
| return (self.subject == other.subject and | |
| self.predicate == other.predicate and | |
| self.term_object == other.term_object) | |
| class TextProcessor: | |
| def __init__(self): | |
| self.lemmatizer_instance = None | |
| self._use_nltk_tokenizer = False | |
| if NLTK_FULLY_AVAILABLE and nltk_WordNetLemmatizer_class: | |
| try: | |
| self.lemmatizer_instance = nltk_WordNetLemmatizer_class() | |
| # We also need tokenizer for full NLTK processing | |
| if nltk_word_tokenize_func: | |
| self._use_nltk_tokenizer = True | |
| print("TextProcessor: Using NLTK for tokenization and lemmatization.") | |
| else: # Should not happen if NLTK_FULLY_AVAILABLE is true | |
| print("TextProcessor: NLTK Lemmatizer available, but tokenizer missing. Using basic tokenization.") | |
| except Exception as e: | |
| print(f"TextProcessor: Failed to init NLTK lemmatizer ({e}), falling back for lemmatization.") | |
| self.lemmatizer_instance = None | |
| elif nltk_word_tokenize_func: # Only tokenizer is available | |
| self._use_nltk_tokenizer = True | |
| print("TextProcessor: Using NLTK for tokenization, basic lemmatization.") | |
| else: | |
| print("TextProcessor: Using basic tokenization and lemmatization (fallback).") | |
| def get_lemmatized_tokens(self, text: str) -> List[str]: | |
| current_text_lower = text.lower() | |
| raw_tokens: List[str] | |
| if self._use_nltk_tokenizer and nltk_word_tokenize_func: | |
| try: | |
| raw_tokens = nltk_word_tokenize_func(current_text_lower) | |
| except Exception as e: | |
| print(f"NLTK tokenization failed during use: {e}. Falling back.") | |
| raw_tokens = current_text_lower.split() | |
| else: | |
| raw_tokens = current_text_lower.split() | |
| lemmatized_tokens: List[str] | |
| if self.lemmatizer_instance: | |
| try: | |
| lemmatized_tokens = [self.lemmatizer_instance.lemmatize(token) for token in raw_tokens] | |
| except Exception as e: | |
| print(f"NLTK lemmatization failed during use: {e}. Falling back.") | |
| lemmatized_tokens = list(raw_tokens) | |
| else: | |
| lemmatized_tokens = list(raw_tokens) | |
| cleaned_tokens = [token for token in lemmatized_tokens if token.isalnum() and token] | |
| return cleaned_tokens | |
| class MarkovChain: | |
| def __init__(self): | |
| self.transitions: Dict[str, Dict[str, int]] = collections.defaultdict(lambda: collections.defaultdict(int)) | |
| self.state_totals: Dict[str, int] = collections.defaultdict(int) | |
| def add_transition(self, from_state: str, to_state: str): | |
| self.transitions[from_state][to_state] += 1 | |
| self.state_totals[from_state] += 1 | |
| def get_next_state(self, current_state: str) -> Optional[NarseseTerm]: | |
| possible_next_states = self.transitions.get(current_state) | |
| if not possible_next_states: return None | |
| total_weight = self.state_totals.get(current_state, 0) | |
| if total_weight == 0: return None | |
| states = list(possible_next_states.keys()) | |
| weights = list(possible_next_states.values()) | |
| if not states: return None | |
| chosen_state = random.choices(states, weights=weights, k=1)[0] | |
| return NarseseTerm(chosen_state) | |
| def get_transition_belief(self, from_state: str, to_state: str) -> Optional[NarseseStatement]: | |
| count = self.transitions.get(from_state, {}).get(to_state) | |
| if count is None: return None | |
| total_from = self.state_totals.get(from_state) | |
| if total_from is None or total_from == 0: return None | |
| frequency = float(count) / float(total_from) | |
| confidence = 1.0 - (1.0 / (1.0 + count)) | |
| return NarseseStatement(subject=NarseseTerm(from_state), term_object=NarseseTerm(to_state), frequency=frequency, confidence=confidence) | |
| def get_known_states(self) -> Set[str]: return set(self.transitions.keys()) | |
| def print_model(self): | |
| print("\n--- Markov Chain Model (Compiled Knowledge) ---") | |
| for from_state, to_map in self.transitions.items(): | |
| total_for_from = self.state_totals.get(from_state, 0) | |
| print(f"From '{from_state}' (total {total_for_from}):") | |
| for to_state, count in to_map.items(): | |
| prob = (float(count) / float(total_for_from)) if total_for_from > 0 else 0.0 | |
| print(f" -> '{to_state}': {count} times (prob: {prob:.3f})") | |
| class CircularBeliefBuffer: | |
| def __init__(self, capacity: int): | |
| self.capacity = capacity | |
| self.buffer: collections.deque[NarseseStatement] = collections.deque() | |
| self.garbage_collected_count = 0 | |
| def add(self, statement: NarseseStatement): | |
| evicted_statement: Optional[NarseseStatement] = None | |
| if len(self.buffer) >= self.capacity: | |
| evicted_statement = self.buffer.popleft() | |
| self.garbage_collected_count += 1 | |
| self.buffer.append(statement) | |
| eviction_info = f"Evicted: {evicted_statement}." if evicted_statement else "" | |
| print(f"BUFFER: Added: {statement}. {eviction_info} GC Count: {self.garbage_collected_count}") | |
| def promote(self, statement: NarseseStatement) -> bool: | |
| try: | |
| self.buffer.remove(statement) | |
| self.buffer.append(statement) | |
| print(f"BUFFER: Promoted {statement} to forefront.") | |
| return True | |
| except ValueError: | |
| return False | |
| def get_forefront_knowledge(self) -> List[NarseseStatement]: return list(reversed(self.buffer)) | |
| def display(self): | |
| print("\n--- NARS Router (Circular Belief Buffer State - Newest First) ---") | |
| if not self.buffer: print("Buffer is empty."); return | |
| for i, statement in enumerate(self.get_forefront_knowledge()): print(f"{i + 1}. {statement}") | |
| print("-------------------------------------------------------------") | |
| class TruffleCompiledNARSMarkov: | |
| def __init__(self): | |
| self.markov_chain = MarkovChain() | |
| self.belief_buffer = CircularBeliefBuffer(BUFFER_CAPACITY) | |
| self.text_processor = TextProcessor() | |
| self.current_production_state: Optional[NarseseTerm] = None | |
| def compile_source(self, text: str): | |
| print("\n--- COMPILATION PHASE ---") | |
| tokens = self.text_processor.get_lemmatized_tokens(text) | |
| print(f"Lemmatized Tokens: {tokens}") | |
| if len(tokens) < MARKOV_CHAIN_ORDER + 1: | |
| print(f"Not enough tokens to build Markov chain of order {MARKOV_CHAIN_ORDER}.") | |
| return | |
| for i in range(len(tokens) - MARKOV_CHAIN_ORDER): | |
| current_state_token = tokens[i] | |
| next_state_token = tokens[i + MARKOV_CHAIN_ORDER] | |
| self.markov_chain.add_transition(current_state_token, next_state_token) | |
| self.markov_chain.print_model() | |
| def run_productions(self, start_token: Optional[str] = None, max_steps: int = MAX_PRODUCTIONS_TO_RUN): | |
| print("\n--- PRODUCTION SYSTEM EXECUTION PHASE ---") | |
| known_states = self.markov_chain.get_known_states() | |
| if not known_states: print("Markov chain is empty. Cannot run productions."); return | |
| if start_token and start_token in known_states: | |
| self.current_production_state = NarseseTerm(start_token) | |
| else: | |
| if not known_states: | |
| print("No known states to pick a random start from.") | |
| return | |
| self.current_production_state = NarseseTerm(random.choice(list(known_states))) | |
| if not self.current_production_state: print("Could not determine a starting state."); return | |
| print(f"Starting production with initial state: {self.current_production_state.name}") | |
| for i in range(max_steps): | |
| current_state_name = self.current_production_state.name | |
| next_state_term = self.markov_chain.get_next_state(current_state_name) | |
| if next_state_term is None: | |
| print(f"Production halted: No next state from '{current_state_name}'.") | |
| remaining_states = known_states - {current_state_name} | |
| if remaining_states: | |
| self.current_production_state = NarseseTerm(random.choice(list(remaining_states))) | |
| print(f"Restarting production from new random state: {self.current_production_state.name}") | |
| continue | |
| else: print("No other states to transition to. Stopping."); break | |
| production_output_belief = self.markov_chain.get_transition_belief(current_state_name, next_state_term.name) | |
| if production_output_belief: | |
| print(f"\n[Step {i+1}] Fired Production: {production_output_belief}") | |
| self.belief_buffer.add(production_output_belief) | |
| self.belief_buffer.display() | |
| self.current_production_state = production_output_belief.term_object | |
| else: | |
| print(f"Production Error: Could not form belief for {current_state_name} -> {next_state_term.name if next_state_term else 'None'}"); break | |
| print("\n--- Production run finished ---") | |
| def main_script_logic(): | |
| print("=== Python Truffle-inspired NARS Markov Production System Demo ===") | |
| sample_text = """ | |
| The quick brown fox jumps over the lazy dog. | |
| The lazy dog barks. The fox runs away. | |
| A dog is a man's best friend. A fox is a wild animal. | |
| The dog and fox are animals. Animals live in the wild or with man. | |
| The quick fox is quick. The lazy dog is lazy. | |
| """.strip() | |
| nars_system = TruffleCompiledNARSMarkov() | |
| nars_system.compile_source(sample_text) | |
| start_tokens = nars_system.markov_chain.get_known_states() | |
| chosen_start = None | |
| if "dog" in start_tokens: chosen_start = "dog" | |
| elif start_tokens: chosen_start = list(start_tokens)[0] | |
| if chosen_start: | |
| nars_system.run_productions(start_token=chosen_start, max_steps=MAX_PRODUCTIONS_TO_RUN) | |
| else: | |
| print("Cannot start productions, no suitable start token found or Markov model is empty.") | |
| print("\n--- Final Belief Buffer State ---") | |
| nars_system.belief_buffer.display() | |
| print("\n=== Demo Finished ===") | |
| if __name__ == "__main__": | |
| main_script_logic() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment