jnorthrup · June 3, 2025 15:27
diff --git a/thing.py b/thing.py
 import collections
 import random
 from dataclasses import dataclass
 from typing import List, Dict, Optional, Set, Tuple

 # Simplified NLTK setup
 NLTK_FULLY_AVAILABLE = False
 nltk_word_tokenize_func = None # Renamed to avoid conflict if nltk itself is imported
 nltk_WordNetLemmatizer_class = None # Renamed

 try:
    import nltk
    # Check for 'punkt' for tokenization
    try:
        nltk.data.find('tokenizers/punkt')
        from nltk.tokenize import word_tokenize as imported_wt
        nltk_word_tokenize_func = imported_wt
    except Exception: 
        print("NLTK 'punkt' (tokenizer) not found or import failed. Basic tokenization will be used.")
        nltk_word_tokenize_func = None

    # Check for 'wordnet' and 'omw-1.4' for lemmatization
    try:
        nltk.data.find('corpora/wordnet')
        nltk.data.find('corpora/omw-1.4')
        from nltk.stem import WordNetLemmatizer as imported_wnl
        nltk_WordNetLemmatizer_class = imported_wnl
    except Exception: 
        print("NLTK 'wordnet'/'omw-1.4' (lemmatizer) not found or import failed. Basic lemmatization will be used.")
        nltk_WordNetLemmatizer_class = None
    
    if nltk_word_tokenize_func and nltk_WordNetLemmatizer_class:
        NLTK_FULLY_AVAILABLE = True
        print("NLTK tokenizer and lemmatizer appear to be available.")
    elif nltk_word_tokenize_func:
        print("NLTK tokenizer available; basic lemmatization will be used.")
    elif nltk_WordNetLemmatizer_class:
        print("NLTK lemmatizer available; basic tokenization will be used.")
    else:
        if 'nltk' in globals() or 'nltk' in locals(): # if nltk module was imported but resources failed
             print("NLTK module imported, but resources for tokenization/lemmatization are not available. Full fallback.")
        else: # This case should ideally be caught by the outer ImportError
             print("Neither NLTK tokenizer nor lemmatizer are available (module import likely failed). Full fallback.")


 except ImportError:
    print("NLTK module not found. Full fallback (basic tokenization and lemmatization).")


 # --- Configuration ---
 MARKOV_CHAIN_ORDER = 1
 BUFFER_CAPACITY = 10
 MAX_PRODUCTIONS_TO_RUN = 15
 TRUTH_VALUE_SCALING_FACTOR = 100.0

 @dataclass(frozen=True)
 class NarseseTerm:
    name: str
    def __str__(self) -> str: return self.name

 @dataclass
 class NarseseStatement:
    subject: NarseseTerm
    term_object: NarseseTerm
    predicate: str = "==>"
    frequency: float = 0.0
    confidence: float = 0.0

    def __str__(self) -> str:
        f_scaled = self.frequency * TRUTH_VALUE_SCALING_FACTOR
        return f"({self.subject.name} {self.predicate} {self.term_object.name}) {{f={f_scaled:.2f}, c={self.confidence:.2f}}}"

    def __hash__(self): return hash((self.subject, self.predicate, self.term_object))
    def __eq__(self, other):
        if not isinstance(other, NarseseStatement): return NotImplemented
        return (self.subject == other.subject and
                self.predicate == other.predicate and
                self.term_object == other.term_object)

 class TextProcessor:
    def __init__(self):
        self.lemmatizer_instance = None
        self._use_nltk_tokenizer = False

        if NLTK_FULLY_AVAILABLE and nltk_WordNetLemmatizer_class: 
            try:
                self.lemmatizer_instance = nltk_WordNetLemmatizer_class()
                # We also need tokenizer for full NLTK processing
                if nltk_word_tokenize_func:
                    self._use_nltk_tokenizer = True
                    print("TextProcessor: Using NLTK for tokenization and lemmatization.")
                else: # Should not happen if NLTK_FULLY_AVAILABLE is true
                    print("TextProcessor: NLTK Lemmatizer available, but tokenizer missing. Using basic tokenization.")
            except Exception as e:
                print(f"TextProcessor: Failed to init NLTK lemmatizer ({e}), falling back for lemmatization.")
                self.lemmatizer_instance = None
        elif nltk_word_tokenize_func: # Only tokenizer is available
            self._use_nltk_tokenizer = True
            print("TextProcessor: Using NLTK for tokenization, basic lemmatization.")
        else: 
            print("TextProcessor: Using basic tokenization and lemmatization (fallback).")

    def get_lemmatized_tokens(self, text: str) -> List[str]:
        current_text_lower = text.lower()
        raw_tokens: List[str]

        if self._use_nltk_tokenizer and nltk_word_tokenize_func:
            try:
                raw_tokens = nltk_word_tokenize_func(current_text_lower)
            except Exception as e: 
                print(f"NLTK tokenization failed during use: {e}. Falling back.")
                raw_tokens = current_text_lower.split()
        else:
            raw_tokens = current_text_lower.split()

        lemmatized_tokens: List[str]
        if self.lemmatizer_instance: 
            try:
                lemmatized_tokens = [self.lemmatizer_instance.lemmatize(token) for token in raw_tokens]
            except Exception as e:
                print(f"NLTK lemmatization failed during use: {e}. Falling back.")
                lemmatized_tokens = list(raw_tokens)
        else:
            lemmatized_tokens = list(raw_tokens) 

        cleaned_tokens = [token for token in lemmatized_tokens if token.isalnum() and token]
        return cleaned_tokens

 class MarkovChain:
    def __init__(self):
        self.transitions: Dict[str, Dict[str, int]] = collections.defaultdict(lambda: collections.defaultdict(int))
        self.state_totals: Dict[str, int] = collections.defaultdict(int)

    def add_transition(self, from_state: str, to_state: str):
        self.transitions[from_state][to_state] += 1
        self.state_totals[from_state] += 1

    def get_next_state(self, current_state: str) -> Optional[NarseseTerm]:
        possible_next_states = self.transitions.get(current_state)
        if not possible_next_states: return None
        total_weight = self.state_totals.get(current_state, 0)
        if total_weight == 0: return None
        states = list(possible_next_states.keys())
        weights = list(possible_next_states.values())
        if not states: return None 
        chosen_state = random.choices(states, weights=weights, k=1)[0]
        return NarseseTerm(chosen_state)

    def get_transition_belief(self, from_state: str, to_state: str) -> Optional[NarseseStatement]:
        count = self.transitions.get(from_state, {}).get(to_state)
        if count is None: return None
        total_from = self.state_totals.get(from_state)
        if total_from is None or total_from == 0: return None
        frequency = float(count) / float(total_from)
        confidence = 1.0 - (1.0 / (1.0 + count))
        return NarseseStatement(subject=NarseseTerm(from_state), term_object=NarseseTerm(to_state), frequency=frequency, confidence=confidence)

    def get_known_states(self) -> Set[str]: return set(self.transitions.keys())

    def print_model(self):
        print("\n--- Markov Chain Model (Compiled Knowledge) ---")
        for from_state, to_map in self.transitions.items():
            total_for_from = self.state_totals.get(from_state, 0)
            print(f"From '{from_state}' (total {total_for_from}):")
            for to_state, count in to_map.items():
                prob = (float(count) / float(total_for_from)) if total_for_from > 0 else 0.0
                print(f"  -> '{to_state}': {count} times (prob: {prob:.3f})")

 class CircularBeliefBuffer:
    def __init__(self, capacity: int):
        self.capacity = capacity
        self.buffer: collections.deque[NarseseStatement] = collections.deque()
        self.garbage_collected_count = 0

    def add(self, statement: NarseseStatement):
        evicted_statement: Optional[NarseseStatement] = None
        if len(self.buffer) >= self.capacity:
            evicted_statement = self.buffer.popleft()
            self.garbage_collected_count += 1
        self.buffer.append(statement)
        eviction_info = f"Evicted: {evicted_statement}." if evicted_statement else ""
        print(f"BUFFER: Added: {statement}. {eviction_info} GC Count: {self.garbage_collected_count}")

    def promote(self, statement: NarseseStatement) -> bool:
        try:
            self.buffer.remove(statement) 
            self.buffer.append(statement) 
            print(f"BUFFER: Promoted {statement} to forefront.")
            return True
        except ValueError: 
            return False

    def get_forefront_knowledge(self) -> List[NarseseStatement]: return list(reversed(self.buffer))

    def display(self):
        print("\n--- NARS Router (Circular Belief Buffer State - Newest First) ---")
        if not self.buffer: print("Buffer is empty."); return
        for i, statement in enumerate(self.get_forefront_knowledge()): print(f"{i + 1}. {statement}")
        print("-------------------------------------------------------------")

 class TruffleCompiledNARSMarkov:
    def __init__(self):
        self.markov_chain = MarkovChain()
        self.belief_buffer = CircularBeliefBuffer(BUFFER_CAPACITY)
        self.text_processor = TextProcessor()
        self.current_production_state: Optional[NarseseTerm] = None

    def compile_source(self, text: str):
        print("\n--- COMPILATION PHASE ---")
        tokens = self.text_processor.get_lemmatized_tokens(text)
        print(f"Lemmatized Tokens: {tokens}")
        if len(tokens) < MARKOV_CHAIN_ORDER + 1:
            print(f"Not enough tokens to build Markov chain of order {MARKOV_CHAIN_ORDER}.")
            return
        for i in range(len(tokens) - MARKOV_CHAIN_ORDER):
            current_state_token = tokens[i]
            next_state_token = tokens[i + MARKOV_CHAIN_ORDER]
            self.markov_chain.add_transition(current_state_token, next_state_token)
        self.markov_chain.print_model()

    def run_productions(self, start_token: Optional[str] = None, max_steps: int = MAX_PRODUCTIONS_TO_RUN):
        print("\n--- PRODUCTION SYSTEM EXECUTION PHASE ---")
        known_states = self.markov_chain.get_known_states()
        if not known_states: print("Markov chain is empty. Cannot run productions."); return

        if start_token and start_token in known_states:
            self.current_production_state = NarseseTerm(start_token)
        else:
            if not known_states: 
                 print("No known states to pick a random start from.")
                 return
            self.current_production_state = NarseseTerm(random.choice(list(known_states)))
        
        if not self.current_production_state: print("Could not determine a starting state."); return
        print(f"Starting production with initial state: {self.current_production_state.name}")

        for i in range(max_steps):
            current_state_name = self.current_production_state.name
            next_state_term = self.markov_chain.get_next_state(current_state_name)
            if next_state_term is None:
                print(f"Production halted: No next state from '{current_state_name}'.")
                remaining_states = known_states - {current_state_name}
                if remaining_states:
                    self.current_production_state = NarseseTerm(random.choice(list(remaining_states)))
                    print(f"Restarting production from new random state: {self.current_production_state.name}")
                    continue
                else: print("No other states to transition to. Stopping."); break
            
            production_output_belief = self.markov_chain.get_transition_belief(current_state_name, next_state_term.name)
            if production_output_belief:
                print(f"\n[Step {i+1}] Fired Production: {production_output_belief}")
                self.belief_buffer.add(production_output_belief)
                self.belief_buffer.display()
                self.current_production_state = production_output_belief.term_object
            else: 
                print(f"Production Error: Could not form belief for {current_state_name} -> {next_state_term.name if next_state_term else 'None'}"); break
        print("\n--- Production run finished ---")

 def main_script_logic():
    print("=== Python Truffle-inspired NARS Markov Production System Demo ===")
    sample_text = """
        The quick brown fox jumps over the lazy dog.
        The lazy dog barks. The fox runs away.
        A dog is a man's best friend. A fox is a wild animal.
        The dog and fox are animals. Animals live in the wild or with man.
        The quick fox is quick. The lazy dog is lazy.
    """.strip()
    nars_system = TruffleCompiledNARSMarkov()
    nars_system.compile_source(sample_text)
    start_tokens = nars_system.markov_chain.get_known_states()
    chosen_start = None
    if "dog" in start_tokens: chosen_start = "dog"
    elif start_tokens: chosen_start = list(start_tokens)[0] 
    
    if chosen_start: 
        nars_system.run_productions(start_token=chosen_start, max_steps=MAX_PRODUCTIONS_TO_RUN)
    else: 
        print("Cannot start productions, no suitable start token found or Markov model is empty.")
    
    print("\n--- Final Belief Buffer State ---")
    nars_system.belief_buffer.display()
    print("\n=== Demo Finished ===")

 if __name__ == "__main__":
    main_script_logic()
	import collections
	import random
	from dataclasses import dataclass
	from typing import List, Dict, Optional, Set, Tuple

	# Simplified NLTK setup
	NLTK_FULLY_AVAILABLE = False
	nltk_word_tokenize_func = None # Renamed to avoid conflict if nltk itself is imported
	nltk_WordNetLemmatizer_class = None # Renamed

	try:
	import nltk
	# Check for 'punkt' for tokenization
	try:
	nltk.data.find('tokenizers/punkt')
	from nltk.tokenize import word_tokenize as imported_wt
	nltk_word_tokenize_func = imported_wt
	except Exception:
	print("NLTK 'punkt' (tokenizer) not found or import failed. Basic tokenization will be used.")
	nltk_word_tokenize_func = None

	# Check for 'wordnet' and 'omw-1.4' for lemmatization
	try:
	nltk.data.find('corpora/wordnet')
	nltk.data.find('corpora/omw-1.4')
	from nltk.stem import WordNetLemmatizer as imported_wnl
	nltk_WordNetLemmatizer_class = imported_wnl
	except Exception:
	print("NLTK 'wordnet'/'omw-1.4' (lemmatizer) not found or import failed. Basic lemmatization will be used.")
	nltk_WordNetLemmatizer_class = None

	if nltk_word_tokenize_func and nltk_WordNetLemmatizer_class:
	NLTK_FULLY_AVAILABLE = True
	print("NLTK tokenizer and lemmatizer appear to be available.")
	elif nltk_word_tokenize_func:
	print("NLTK tokenizer available; basic lemmatization will be used.")
	elif nltk_WordNetLemmatizer_class:
	print("NLTK lemmatizer available; basic tokenization will be used.")
	else:
	if 'nltk' in globals() or 'nltk' in locals(): # if nltk module was imported but resources failed
	print("NLTK module imported, but resources for tokenization/lemmatization are not available. Full fallback.")
	else: # This case should ideally be caught by the outer ImportError
	print("Neither NLTK tokenizer nor lemmatizer are available (module import likely failed). Full fallback.")


	except ImportError:
	print("NLTK module not found. Full fallback (basic tokenization and lemmatization).")


	# --- Configuration ---
	MARKOV_CHAIN_ORDER = 1
	BUFFER_CAPACITY = 10
	MAX_PRODUCTIONS_TO_RUN = 15
	TRUTH_VALUE_SCALING_FACTOR = 100.0

	@dataclass(frozen=True)
	class NarseseTerm:
	name: str
	def __str__(self) -> str: return self.name

	@dataclass
	class NarseseStatement:
	subject: NarseseTerm
	term_object: NarseseTerm
	predicate: str = "==>"
	frequency: float = 0.0
	confidence: float = 0.0

	def __str__(self) -> str:
	f_scaled = self.frequency * TRUTH_VALUE_SCALING_FACTOR
	return f"({self.subject.name} {self.predicate} {self.term_object.name}) {{f={f_scaled:.2f}, c={self.confidence:.2f}}}"

	def __hash__(self): return hash((self.subject, self.predicate, self.term_object))
	def __eq__(self, other):
	if not isinstance(other, NarseseStatement): return NotImplemented
	return (self.subject == other.subject and
	self.predicate == other.predicate and
	self.term_object == other.term_object)

	class TextProcessor:
	def __init__(self):
	self.lemmatizer_instance = None
	self._use_nltk_tokenizer = False

	if NLTK_FULLY_AVAILABLE and nltk_WordNetLemmatizer_class:
	try:
	self.lemmatizer_instance = nltk_WordNetLemmatizer_class()
	# We also need tokenizer for full NLTK processing
	if nltk_word_tokenize_func:
	self._use_nltk_tokenizer = True
	print("TextProcessor: Using NLTK for tokenization and lemmatization.")
	else: # Should not happen if NLTK_FULLY_AVAILABLE is true
	print("TextProcessor: NLTK Lemmatizer available, but tokenizer missing. Using basic tokenization.")
	except Exception as e:
	print(f"TextProcessor: Failed to init NLTK lemmatizer ({e}), falling back for lemmatization.")
	self.lemmatizer_instance = None
	elif nltk_word_tokenize_func: # Only tokenizer is available
	self._use_nltk_tokenizer = True
	print("TextProcessor: Using NLTK for tokenization, basic lemmatization.")
	else:
	print("TextProcessor: Using basic tokenization and lemmatization (fallback).")

	def get_lemmatized_tokens(self, text: str) -> List[str]:
	current_text_lower = text.lower()
	raw_tokens: List[str]

	if self._use_nltk_tokenizer and nltk_word_tokenize_func:
	try:
	raw_tokens = nltk_word_tokenize_func(current_text_lower)
	except Exception as e:
	print(f"NLTK tokenization failed during use: {e}. Falling back.")
	raw_tokens = current_text_lower.split()
	else:
	raw_tokens = current_text_lower.split()

	lemmatized_tokens: List[str]
	if self.lemmatizer_instance:
	try:
	lemmatized_tokens = [self.lemmatizer_instance.lemmatize(token) for token in raw_tokens]
	except Exception as e:
	print(f"NLTK lemmatization failed during use: {e}. Falling back.")
	lemmatized_tokens = list(raw_tokens)
	else:
	lemmatized_tokens = list(raw_tokens)

	cleaned_tokens = [token for token in lemmatized_tokens if token.isalnum() and token]
	return cleaned_tokens

	class MarkovChain:
	def __init__(self):
	self.transitions: Dict[str, Dict[str, int]] = collections.defaultdict(lambda: collections.defaultdict(int))
	self.state_totals: Dict[str, int] = collections.defaultdict(int)

	def add_transition(self, from_state: str, to_state: str):
	self.transitions[from_state][to_state] += 1
	self.state_totals[from_state] += 1

	def get_next_state(self, current_state: str) -> Optional[NarseseTerm]:
	possible_next_states = self.transitions.get(current_state)
	if not possible_next_states: return None
	total_weight = self.state_totals.get(current_state, 0)
	if total_weight == 0: return None
	states = list(possible_next_states.keys())
	weights = list(possible_next_states.values())
	if not states: return None
	chosen_state = random.choices(states, weights=weights, k=1)[0]
	return NarseseTerm(chosen_state)

	def get_transition_belief(self, from_state: str, to_state: str) -> Optional[NarseseStatement]:
	count = self.transitions.get(from_state, {}).get(to_state)
	if count is None: return None
	total_from = self.state_totals.get(from_state)
	if total_from is None or total_from == 0: return None
	frequency = float(count) / float(total_from)
	confidence = 1.0 - (1.0 / (1.0 + count))
	return NarseseStatement(subject=NarseseTerm(from_state), term_object=NarseseTerm(to_state), frequency=frequency, confidence=confidence)

	def get_known_states(self) -> Set[str]: return set(self.transitions.keys())

	def print_model(self):
	print("\n--- Markov Chain Model (Compiled Knowledge) ---")
	for from_state, to_map in self.transitions.items():
	total_for_from = self.state_totals.get(from_state, 0)
	print(f"From '{from_state}' (total {total_for_from}):")
	for to_state, count in to_map.items():
	prob = (float(count) / float(total_for_from)) if total_for_from > 0 else 0.0
	print(f" -> '{to_state}': {count} times (prob: {prob:.3f})")

	class CircularBeliefBuffer:
	def __init__(self, capacity: int):
	self.capacity = capacity
	self.buffer: collections.deque[NarseseStatement] = collections.deque()
	self.garbage_collected_count = 0

	def add(self, statement: NarseseStatement):
	evicted_statement: Optional[NarseseStatement] = None
	if len(self.buffer) >= self.capacity:
	evicted_statement = self.buffer.popleft()
	self.garbage_collected_count += 1
	self.buffer.append(statement)
	eviction_info = f"Evicted: {evicted_statement}." if evicted_statement else ""
	print(f"BUFFER: Added: {statement}. {eviction_info} GC Count: {self.garbage_collected_count}")

	def promote(self, statement: NarseseStatement) -> bool:
	try:
	self.buffer.remove(statement)
	self.buffer.append(statement)
	print(f"BUFFER: Promoted {statement} to forefront.")
	return True
	except ValueError:
	return False

	def get_forefront_knowledge(self) -> List[NarseseStatement]: return list(reversed(self.buffer))

	def display(self):
	print("\n--- NARS Router (Circular Belief Buffer State - Newest First) ---")
	if not self.buffer: print("Buffer is empty."); return
	for i, statement in enumerate(self.get_forefront_knowledge()): print(f"{i + 1}. {statement}")
	print("-------------------------------------------------------------")

	class TruffleCompiledNARSMarkov:
	def __init__(self):
	self.markov_chain = MarkovChain()
	self.belief_buffer = CircularBeliefBuffer(BUFFER_CAPACITY)
	self.text_processor = TextProcessor()
	self.current_production_state: Optional[NarseseTerm] = None

	def compile_source(self, text: str):
	print("\n--- COMPILATION PHASE ---")
	tokens = self.text_processor.get_lemmatized_tokens(text)
	print(f"Lemmatized Tokens: {tokens}")
	if len(tokens) < MARKOV_CHAIN_ORDER + 1:
	print(f"Not enough tokens to build Markov chain of order {MARKOV_CHAIN_ORDER}.")
	return
	for i in range(len(tokens) - MARKOV_CHAIN_ORDER):
	current_state_token = tokens[i]
	next_state_token = tokens[i + MARKOV_CHAIN_ORDER]
	self.markov_chain.add_transition(current_state_token, next_state_token)
	self.markov_chain.print_model()

	def run_productions(self, start_token: Optional[str] = None, max_steps: int = MAX_PRODUCTIONS_TO_RUN):
	print("\n--- PRODUCTION SYSTEM EXECUTION PHASE ---")
	known_states = self.markov_chain.get_known_states()
	if not known_states: print("Markov chain is empty. Cannot run productions."); return

	if start_token and start_token in known_states:
	self.current_production_state = NarseseTerm(start_token)
	else:
	if not known_states:
	print("No known states to pick a random start from.")
	return
	self.current_production_state = NarseseTerm(random.choice(list(known_states)))

	if not self.current_production_state: print("Could not determine a starting state."); return
	print(f"Starting production with initial state: {self.current_production_state.name}")

	for i in range(max_steps):
	current_state_name = self.current_production_state.name
	next_state_term = self.markov_chain.get_next_state(current_state_name)
	if next_state_term is None:
	print(f"Production halted: No next state from '{current_state_name}'.")
	remaining_states = known_states - {current_state_name}
	if remaining_states:
	self.current_production_state = NarseseTerm(random.choice(list(remaining_states)))
	print(f"Restarting production from new random state: {self.current_production_state.name}")
	continue
	else: print("No other states to transition to. Stopping."); break

	production_output_belief = self.markov_chain.get_transition_belief(current_state_name, next_state_term.name)
	if production_output_belief:
	print(f"\n[Step {i+1}] Fired Production: {production_output_belief}")
	self.belief_buffer.add(production_output_belief)
	self.belief_buffer.display()
	self.current_production_state = production_output_belief.term_object
	else:
	print(f"Production Error: Could not form belief for {current_state_name} -> {next_state_term.name if next_state_term else 'None'}"); break
	print("\n--- Production run finished ---")

	def main_script_logic():
	print("=== Python Truffle-inspired NARS Markov Production System Demo ===")
	sample_text = """
	The quick brown fox jumps over the lazy dog.
	The lazy dog barks. The fox runs away.
	A dog is a man's best friend. A fox is a wild animal.
	The dog and fox are animals. Animals live in the wild or with man.
	The quick fox is quick. The lazy dog is lazy.
	""".strip()
	nars_system = TruffleCompiledNARSMarkov()
	nars_system.compile_source(sample_text)
	start_tokens = nars_system.markov_chain.get_known_states()
	chosen_start = None
	if "dog" in start_tokens: chosen_start = "dog"
	elif start_tokens: chosen_start = list(start_tokens)[0]

	if chosen_start:
	nars_system.run_productions(start_token=chosen_start, max_steps=MAX_PRODUCTIONS_TO_RUN)
	else:
	print("Cannot start productions, no suitable start token found or Markov model is empty.")

	print("\n--- Final Belief Buffer State ---")
	nars_system.belief_buffer.display()
	print("\n=== Demo Finished ===")

	if __name__ == "__main__":
	main_script_logic()
No results found